diff --git a/.env.example b/.env.example index de8b2bb..50141b6 100644 --- a/.env.example +++ b/.env.example @@ -49,6 +49,18 @@ BOT_STATUS=for mentions # Number of messages to remember per user (higher = more context, more tokens) MAX_CONVERSATION_HISTORY=20 +# =========================================== +# Web Search (SearXNG) +# =========================================== +# SearXNG instance URL for web search (enables the bot to access current information) +SEARXNG_URL=https://search.example.com + +# Enable/disable web search capability (true/false) +SEARXNG_ENABLED=true + +# Maximum number of search results to fetch (1-20) +SEARXNG_MAX_RESULTS=5 + # =========================================== # Logging # =========================================== diff --git a/CLAUDE.md b/CLAUDE.md index be9931f..10808b4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -39,11 +39,21 @@ Cogs are auto-loaded by `bot.py` from the `cogs/` directory. ### Configuration All config flows through `config.py` using pydantic-settings. The `settings` singleton is created at module load, so env vars must be set before importing. +### Web Search +The bot can search the web for current information via SearXNG: +- `services/searxng.py` provides `SearXNGService` for web queries +- `ai_chat.py` uses a two-step approach: first asks AI if search is needed, then provides results as context +- Search is triggered automatically when the AI determines the query needs current information +- Configured via `SEARXNG_URL`, `SEARXNG_ENABLED`, and `SEARXNG_MAX_RESULTS` env vars + ### Key Design Decisions - `ConversationManager` stores per-user chat history in memory with configurable max length - Long AI responses are split via `split_message()` in `ai_chat.py` to respect Discord's 2000 char limit - The bot responds only to @mentions via `on_message` listener +- Web search uses AI to decide when to search, avoiding unnecessary API calls for general knowledge questions ## Environment Variables Required: `DISCORD_TOKEN`, plus one of `OPENAI_API_KEY`, `OPENROUTER_API_KEY`, `ANTHROPIC_API_KEY`, or `GEMINI_API_KEY` depending on `AI_PROVIDER` setting. + +Optional: `SEARXNG_URL` for web search capability. diff --git a/README.md b/README.md index 1b05460..e6896e3 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ A customizable Discord bot that responds to @mentions with AI-generated response ## Features - **Multi-Provider AI**: Supports OpenAI, OpenRouter, Anthropic (Claude), and Google Gemini +- **Web Search**: Access current information via SearXNG integration - **Fully Customizable**: Configure bot name, personality, and behavior - **Conversation Memory**: Remembers context per user - **Easy Deployment**: Docker support included @@ -75,6 +76,16 @@ All configuration is done via environment variables in `.env`. | `AI_TEMPERATURE` | `0.7` | Response creativity (0.0-2.0) | | `MAX_CONVERSATION_HISTORY` | `20` | Messages to remember per user | +### Web Search (SearXNG) + +| Variable | Default | Description | +|----------|---------|-------------| +| `SEARXNG_URL` | (none) | SearXNG instance URL | +| `SEARXNG_ENABLED` | `true` | Enable/disable web search | +| `SEARXNG_MAX_RESULTS` | `5` | Max search results to fetch | + +When configured, the bot automatically searches the web for queries that need current information (news, weather, etc.). + ### Example Configurations **Friendly Assistant:** @@ -143,7 +154,8 @@ src/daemon_boyfriend/ └── services/ ├── ai_service.py # AI provider factory ├── providers/ # AI providers - └── conversation.py # Chat history + ├── conversation.py # Chat history + └── searxng.py # Web search service ``` ## License diff --git a/src/daemon_boyfriend/cogs/ai_chat.py b/src/daemon_boyfriend/cogs/ai_chat.py index 14c386d..cf93d35 100644 --- a/src/daemon_boyfriend/cogs/ai_chat.py +++ b/src/daemon_boyfriend/cogs/ai_chat.py @@ -7,7 +7,7 @@ import discord from discord.ext import commands from daemon_boyfriend.config import settings -from daemon_boyfriend.services import AIService, ConversationManager, Message +from daemon_boyfriend.services import AIService, ConversationManager, Message, SearXNGService logger = logging.getLogger(__name__) @@ -71,6 +71,9 @@ class AIChatCog(commands.Cog): self.bot = bot self.ai_service = AIService() self.conversations = ConversationManager() + self.search_service: SearXNGService | None = None + if settings.searxng_enabled and settings.searxng_url: + self.search_service = SearXNGService(settings.searxng_url) @commands.Cog.listener() async def on_message(self, message: discord.Message) -> None: @@ -138,10 +141,23 @@ class AIChatCog(commands.Cog): # Add current message to history for the API call messages = history + [Message(role="user", content=user_message)] + # Check if we should search the web + search_context = await self._maybe_search(user_message) + + # Build system prompt with search context if available + system_prompt = self.ai_service.get_system_prompt() + if search_context: + system_prompt += ( + "\n\n--- Web Search Results ---\n" + "Use the following current information from the web to help answer the user's question. " + "Cite sources when relevant.\n\n" + f"{search_context}" + ) + # Generate response response = await self.ai_service.chat( messages=messages, - system_prompt=self.ai_service.get_system_prompt(), + system_prompt=system_prompt, ) # Save the exchange to history @@ -154,6 +170,64 @@ class AIChatCog(commands.Cog): return response.content + async def _maybe_search(self, query: str) -> str | None: + """Determine if a search is needed and perform it. + + Args: + query: The user's message + + Returns: + Formatted search results or None if search not needed/available + """ + if not self.search_service: + return None + + # Ask the AI if this query needs current information + decision_prompt = ( + "You are a search decision assistant. Your ONLY job is to decide if the user's " + "question requires current/real-time information from the internet.\n\n" + "Respond with ONLY 'SEARCH: ' if a web search would help answer the question " + "(replace with optimal search terms), or 'NO_SEARCH' if the question can be " + "answered with general knowledge.\n\n" + "Examples that NEED search:\n" + "- Current events, news, recent happenings\n" + "- Current weather, stock prices, sports scores\n" + "- Latest version of software, current documentation\n" + "- Information about specific people, companies, or products that may have changed\n" + "- 'What time is it in Tokyo?' or any real-time data\n\n" + "Examples that DON'T need search:\n" + "- General knowledge, science, math, history\n" + "- Coding help, programming concepts\n" + "- Personal advice, opinions, creative writing\n" + "- Explanations of concepts or 'how does X work'" + ) + + try: + decision = await self.ai_service.chat( + messages=[Message(role="user", content=query)], + system_prompt=decision_prompt, + ) + + response_text = decision.content.strip() + + if response_text.startswith("SEARCH:"): + search_query = response_text[7:].strip() + logger.info(f"AI decided to search for: {search_query}") + + results = await self.search_service.search( + query=search_query, + max_results=settings.searxng_max_results, + ) + + if results: + return self.search_service.format_results_for_context(results) + + return None + + except Exception as e: + logger.warning(f"Search decision/execution failed: {e}") + return None + async def setup(bot: commands.Bot) -> None: """Load the AI Chat cog.""" diff --git a/src/daemon_boyfriend/config.py b/src/daemon_boyfriend/config.py index 0c19893..580ee29 100644 --- a/src/daemon_boyfriend/config.py +++ b/src/daemon_boyfriend/config.py @@ -61,6 +61,11 @@ class Settings(BaseSettings): 20, description="Max messages to keep in conversation memory per user" ) + # SearXNG Configuration + searxng_url: str | None = Field(None, description="SearXNG instance URL for web search") + searxng_enabled: bool = Field(True, description="Enable web search capability") + searxng_max_results: int = Field(5, ge=1, le=20, description="Maximum search results to fetch") + def get_api_key(self) -> str: """Get the API key for the configured provider.""" key_map = { diff --git a/src/daemon_boyfriend/services/__init__.py b/src/daemon_boyfriend/services/__init__.py index e00f057..23ae523 100644 --- a/src/daemon_boyfriend/services/__init__.py +++ b/src/daemon_boyfriend/services/__init__.py @@ -3,10 +3,12 @@ from .ai_service import AIService from .conversation import ConversationManager from .providers import AIResponse, Message +from .searxng import SearXNGService __all__ = [ "AIService", "AIResponse", "Message", "ConversationManager", + "SearXNGService", ] diff --git a/src/daemon_boyfriend/services/searxng.py b/src/daemon_boyfriend/services/searxng.py new file mode 100644 index 0000000..5709879 --- /dev/null +++ b/src/daemon_boyfriend/services/searxng.py @@ -0,0 +1,107 @@ +"""SearXNG search service for web queries.""" + +import logging +from dataclasses import dataclass + +import aiohttp + +logger = logging.getLogger(__name__) + + +@dataclass +class SearchResult: + """A single search result.""" + + title: str + url: str + content: str + + +class SearXNGService: + """Service for searching the web via SearXNG.""" + + def __init__(self, base_url: str, timeout: int = 10) -> None: + """Initialize the SearXNG service. + + Args: + base_url: The base URL of the SearXNG instance + timeout: Request timeout in seconds + """ + self.base_url = base_url.rstrip("/") + self.timeout = timeout + + async def search( + self, + query: str, + categories: str = "general", + max_results: int = 5, + ) -> list[SearchResult]: + """Search the web using SearXNG. + + Args: + query: The search query + categories: Search categories (general, images, news, etc.) + max_results: Maximum number of results to return + + Returns: + List of search results + """ + url = f"{self.base_url}/search" + params = { + "q": query, + "format": "json", + "categories": categories, + } + + logger.debug(f"Searching SearXNG for: {query}") + + try: + async with aiohttp.ClientSession() as session: + async with session.get( + url, + params=params, + timeout=aiohttp.ClientTimeout(total=self.timeout), + ) as response: + if response.status != 200: + logger.error(f"SearXNG returned status {response.status}") + return [] + + data = await response.json() + results = [] + + for item in data.get("results", [])[:max_results]: + results.append( + SearchResult( + title=item.get("title", ""), + url=item.get("url", ""), + content=item.get("content", ""), + ) + ) + + logger.debug(f"SearXNG returned {len(results)} results") + return results + + except aiohttp.ClientError as e: + logger.error(f"SearXNG request failed: {e}") + return [] + except TimeoutError: + logger.error("SearXNG request timed out") + return [] + + def format_results_for_context(self, results: list[SearchResult]) -> str: + """Format search results as context for the AI. + + Args: + results: List of search results + + Returns: + Formatted string with search results + """ + if not results: + return "No search results found." + + formatted = [] + for i, result in enumerate(results, 1): + formatted.append(f"[{i}] {result.title}\n URL: {result.url}\n {result.content}") + + return "\n\n".join(formatted)