#!/usr/bin/env python3 """ Blog Text-to-Speech generator. Usage: python scripts/tts_generate.py src/content/blog/my-post.md Environment variables: TTS_PROVIDER - "google" (default), "mistral", or "openai" GOOGLE_API_KEY - Required when TTS_PROVIDER=google MISTRAL_API_KEY - Required when TTS_PROVIDER=mistral OPENAI_API_KEY - Required when TTS_PROVIDER=openai Output: .mp3 in the current working directory (or --output-dir if specified) """ import argparse import os import re import sys def parse_frontmatter(text: str) -> tuple[dict, str]: """Extract YAML frontmatter and return (metadata_dict, body).""" if not text.startswith("---"): return {}, text end = text.find("\n---", 3) if end == -1: return {}, text front = text[3:end].strip() body = text[end + 4:].strip() meta: dict = {} for line in front.splitlines(): if ":" in line: key, _, value = line.partition(":") meta[key.strip()] = value.strip().strip('"').strip("'") return meta, body def clean_markdown(text: str) -> str: """Strip markdown syntax so TTS reads clean prose.""" # Remove code blocks (``` ... ```) text = re.sub(r"```[\s\S]*?```", "", text) # Remove inline code text = re.sub(r"`[^`]+`", "", text) # Remove images text = re.sub(r"!\[.*?\]\(.*?\)", "", text) # Convert links to just the link text text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text) # Remove ATX headings markers but keep text text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE) # Remove bold/italic markers text = re.sub(r"\*{1,3}([^*]+)\*{1,3}", r"\1", text) text = re.sub(r"_{1,3}([^_]+)_{1,3}", r"\1", text) # Remove horizontal rules text = re.sub(r"^[-*_]{3,}\s*$", "", text, flags=re.MULTILINE) # Remove blockquote markers text = re.sub(r"^>\s?", "", text, flags=re.MULTILINE) # Remove list markers text = re.sub(r"^[\*\-\+]\s+", "", text, flags=re.MULTILINE) text = re.sub(r"^\d+\.\s+", "", text, flags=re.MULTILINE) # Collapse multiple blank lines text = re.sub(r"\n{3,}", "\n\n", text) return text.strip() def tts_google(text: str, slug: str, output_path: str) -> None: """Generate audio with Google Cloud TTS (free tier: 1M chars/month).""" import json import urllib.request api_key = os.environ.get("GOOGLE_API_KEY") if not api_key: raise EnvironmentError("GOOGLE_API_KEY is not set") payload = json.dumps({ "input": {"text": text}, "voice": { "languageCode": "nl-NL", "name": "nl-NL-Wavenet-D", "ssmlGender": "FEMALE", }, "audioConfig": {"audioEncoding": "MP3"}, }).encode() url = f"https://texttospeech.googleapis.com/v1/text:synthesize?key={api_key}" req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/json"}) with urllib.request.urlopen(req) as resp: body = json.loads(resp.read()) import base64 audio_bytes = base64.b64decode(body["audioContent"]) with open(output_path, "wb") as f: f.write(audio_bytes) def tts_mistral(text: str, slug: str, output_path: str) -> None: """Generate audio with Mistral Voxtral TTS (~$16/M chars).""" import json import urllib.request api_key = os.environ.get("MISTRAL_API_KEY") if not api_key: raise EnvironmentError("MISTRAL_API_KEY is not set") payload = json.dumps({ "model": "voxtral-mini-tts-2507", "input": text, "voice": "river", }).encode() url = "https://api.mistral.ai/v1/audio/speech" req = urllib.request.Request( url, data=payload, headers={ "Content-Type": "application/json", "Authorization": f"Bearer {api_key}", }, ) with urllib.request.urlopen(req) as resp: audio_bytes = resp.read() with open(output_path, "wb") as f: f.write(audio_bytes) def tts_openai(text: str, slug: str, output_path: str) -> None: """Generate audio with OpenAI TTS-1 (~$15/M chars).""" import json import urllib.request api_key = os.environ.get("OPENAI_API_KEY") if not api_key: raise EnvironmentError("OPENAI_API_KEY is not set") payload = json.dumps({ "model": "tts-1", "input": text, "voice": "nova", }).encode() url = "https://api.openai.com/v1/audio/speech" req = urllib.request.Request( url, data=payload, headers={ "Content-Type": "application/json", "Authorization": f"Bearer {api_key}", }, ) with urllib.request.urlopen(req) as resp: audio_bytes = resp.read() with open(output_path, "wb") as f: f.write(audio_bytes) PROVIDERS = { "google": tts_google, "mistral": tts_mistral, "openai": tts_openai, } def main() -> None: parser = argparse.ArgumentParser(description="Generate TTS audio for a blog post") parser.add_argument("file", help="Path to the .md blog post") parser.add_argument("--output-dir", default=".", help="Directory to write the .mp3 (default: .)") args = parser.parse_args() md_path = args.file if not os.path.isfile(md_path): print(f"ERROR: file not found: {md_path}", file=sys.stderr) sys.exit(1) with open(md_path, encoding="utf-8") as f: raw = f.read() meta, body = parse_frontmatter(raw) slug = meta.get("slug") or os.path.splitext(os.path.basename(md_path))[0] title = meta.get("title", "") # Prepend title so TTS reads it aloud full_text = f"{title}.\n\n{clean_markdown(body)}" if title else clean_markdown(body) provider_name = os.environ.get("TTS_PROVIDER", "google").lower() if provider_name not in PROVIDERS: print(f"ERROR: unknown TTS_PROVIDER '{provider_name}'. Choose from: {', '.join(PROVIDERS)}", file=sys.stderr) sys.exit(1) output_path = os.path.join(args.output_dir, f"{slug}.mp3") print(f"Generating audio for '{slug}' using provider '{provider_name}' ({len(full_text)} chars)...") try: PROVIDERS[provider_name](full_text, slug, output_path) except Exception as exc: print(f"ERROR: TTS generation failed: {exc}", file=sys.stderr) sys.exit(1) print(f"Saved: {output_path}") if __name__ == "__main__": main()