first audio thing that i want to try

2026-04-05 15:21:26 +02:00
parent bb625f82af
commit 00cff1eb7e
5 changed files with 302 additions and 0 deletions
@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+"""
+Blog Text-to-Speech generator.
+
+Usage:
+    python scripts/tts_generate.py src/content/blog/my-post.md
+
+Environment variables:
+    TTS_PROVIDER   - "google" (default), "mistral", or "openai"
+    GOOGLE_API_KEY - Required when TTS_PROVIDER=google
+    MISTRAL_API_KEY - Required when TTS_PROVIDER=mistral
+    OPENAI_API_KEY  - Required when TTS_PROVIDER=openai
+
+Output:
+    <slug>.mp3 in the current working directory (or --output-dir if specified)
+"""
+
+import argparse
+import os
+import re
+import sys
+
+
+def parse_frontmatter(text: str) -> tuple[dict, str]:
+    """Extract YAML frontmatter and return (metadata_dict, body)."""
+    if not text.startswith("---"):
+        return {}, text
+
+    end = text.find("\n---", 3)
+    if end == -1:
+        return {}, text
+
+    front = text[3:end].strip()
+    body = text[end + 4:].strip()
+
+    meta: dict = {}
+    for line in front.splitlines():
+        if ":" in line:
+            key, _, value = line.partition(":")
+            meta[key.strip()] = value.strip().strip('"').strip("'")
+
+    return meta, body
+
+
+def clean_markdown(text: str) -> str:
+    """Strip markdown syntax so TTS reads clean prose."""
+    # Remove code blocks (``` ... ```)
+    text = re.sub(r"```[\s\S]*?```", "", text)
+    # Remove inline code
+    text = re.sub(r"`[^`]+`", "", text)
+    # Remove images
+    text = re.sub(r"!\[.*?\]\(.*?\)", "", text)
+    # Convert links to just the link text
+    text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
+    # Remove ATX headings markers but keep text
+    text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)
+    # Remove bold/italic markers
+    text = re.sub(r"\*{1,3}([^*]+)\*{1,3}", r"\1", text)
+    text = re.sub(r"_{1,3}([^_]+)_{1,3}", r"\1", text)
+    # Remove horizontal rules
+    text = re.sub(r"^[-*_]{3,}\s*$", "", text, flags=re.MULTILINE)
+    # Remove blockquote markers
+    text = re.sub(r"^>\s?", "", text, flags=re.MULTILINE)
+    # Remove list markers
+    text = re.sub(r"^[\*\-\+]\s+", "", text, flags=re.MULTILINE)
+    text = re.sub(r"^\d+\.\s+", "", text, flags=re.MULTILINE)
+    # Collapse multiple blank lines
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    return text.strip()
+
+
+def tts_google(text: str, slug: str, output_path: str) -> None:
+    """Generate audio with Google Cloud TTS (free tier: 1M chars/month)."""
+    import json
+    import urllib.request
+
+    api_key = os.environ.get("GOOGLE_API_KEY")
+    if not api_key:
+        raise EnvironmentError("GOOGLE_API_KEY is not set")
+
+    payload = json.dumps({
+        "input": {"text": text},
+        "voice": {
+            "languageCode": "nl-NL",
+            "name": "nl-NL-Wavenet-D",
+            "ssmlGender": "FEMALE",
+        },
+        "audioConfig": {"audioEncoding": "MP3"},
+    }).encode()
+
+    url = f"https://texttospeech.googleapis.com/v1/text:synthesize?key={api_key}"
+    req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/json"})
+
+    with urllib.request.urlopen(req) as resp:
+        body = json.loads(resp.read())
+
+    import base64
+    audio_bytes = base64.b64decode(body["audioContent"])
+    with open(output_path, "wb") as f:
+        f.write(audio_bytes)
+
+
+def tts_mistral(text: str, slug: str, output_path: str) -> None:
+    """Generate audio with Mistral Voxtral TTS (~$16/M chars)."""
+    import json
+    import urllib.request
+
+    api_key = os.environ.get("MISTRAL_API_KEY")
+    if not api_key:
+        raise EnvironmentError("MISTRAL_API_KEY is not set")
+
+    payload = json.dumps({
+        "model": "voxtral-mini-tts-2507",
+        "input": text,
+        "voice": "river",
+    }).encode()
+
+    url = "https://api.mistral.ai/v1/audio/speech"
+    req = urllib.request.Request(
+        url,
+        data=payload,
+        headers={
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}",
+        },
+    )
+
+    with urllib.request.urlopen(req) as resp:
+        audio_bytes = resp.read()
+
+    with open(output_path, "wb") as f:
+        f.write(audio_bytes)
+
+
+def tts_openai(text: str, slug: str, output_path: str) -> None:
+    """Generate audio with OpenAI TTS-1 (~$15/M chars)."""
+    import json
+    import urllib.request
+
+    api_key = os.environ.get("OPENAI_API_KEY")
+    if not api_key:
+        raise EnvironmentError("OPENAI_API_KEY is not set")
+
+    payload = json.dumps({
+        "model": "tts-1",
+        "input": text,
+        "voice": "nova",
+    }).encode()
+
+    url = "https://api.openai.com/v1/audio/speech"
+    req = urllib.request.Request(
+        url,
+        data=payload,
+        headers={
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}",
+        },
+    )
+
+    with urllib.request.urlopen(req) as resp:
+        audio_bytes = resp.read()
+
+    with open(output_path, "wb") as f:
+        f.write(audio_bytes)
+
+
+PROVIDERS = {
+    "google": tts_google,
+    "mistral": tts_mistral,
+    "openai": tts_openai,
+}
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generate TTS audio for a blog post")
+    parser.add_argument("file", help="Path to the .md blog post")
+    parser.add_argument("--output-dir", default=".", help="Directory to write the .mp3 (default: .)")
+    args = parser.parse_args()
+
+    md_path = args.file
+    if not os.path.isfile(md_path):
+        print(f"ERROR: file not found: {md_path}", file=sys.stderr)
+        sys.exit(1)
+
+    with open(md_path, encoding="utf-8") as f:
+        raw = f.read()
+
+    meta, body = parse_frontmatter(raw)
+
+    slug = meta.get("slug") or os.path.splitext(os.path.basename(md_path))[0]
+    title = meta.get("title", "")
+
+    # Prepend title so TTS reads it aloud
+    full_text = f"{title}.\n\n{clean_markdown(body)}" if title else clean_markdown(body)
+
+    provider_name = os.environ.get("TTS_PROVIDER", "google").lower()
+    if provider_name not in PROVIDERS:
+        print(f"ERROR: unknown TTS_PROVIDER '{provider_name}'. Choose from: {', '.join(PROVIDERS)}", file=sys.stderr)
+        sys.exit(1)
+
+    output_path = os.path.join(args.output_dir, f"{slug}.mp3")
+
+    print(f"Generating audio for '{slug}' using provider '{provider_name}' ({len(full_text)} chars)...")
+
+    try:
+        PROVIDERS[provider_name](full_text, slug, output_path)
+    except Exception as exc:
+        print(f"ERROR: TTS generation failed: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"Saved: {output_path}")
+
+
+if __name__ == "__main__":
+    main()