216 lines
6.3 KiB
Python
216 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Blog Text-to-Speech generator.
|
|
|
|
Usage:
|
|
python scripts/tts_generate.py src/content/blog/my-post.md
|
|
|
|
Environment variables:
|
|
TTS_PROVIDER - "google" (default), "mistral", or "openai"
|
|
GOOGLE_API_KEY - Required when TTS_PROVIDER=google
|
|
MISTRAL_API_KEY - Required when TTS_PROVIDER=mistral
|
|
OPENAI_API_KEY - Required when TTS_PROVIDER=openai
|
|
|
|
Output:
|
|
<slug>.mp3 in the current working directory (or --output-dir if specified)
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
|
|
def parse_frontmatter(text: str) -> tuple[dict, str]:
|
|
"""Extract YAML frontmatter and return (metadata_dict, body)."""
|
|
if not text.startswith("---"):
|
|
return {}, text
|
|
|
|
end = text.find("\n---", 3)
|
|
if end == -1:
|
|
return {}, text
|
|
|
|
front = text[3:end].strip()
|
|
body = text[end + 4:].strip()
|
|
|
|
meta: dict = {}
|
|
for line in front.splitlines():
|
|
if ":" in line:
|
|
key, _, value = line.partition(":")
|
|
meta[key.strip()] = value.strip().strip('"').strip("'")
|
|
|
|
return meta, body
|
|
|
|
|
|
def clean_markdown(text: str) -> str:
|
|
"""Strip markdown syntax so TTS reads clean prose."""
|
|
# Remove code blocks (``` ... ```)
|
|
text = re.sub(r"```[\s\S]*?```", "", text)
|
|
# Remove inline code
|
|
text = re.sub(r"`[^`]+`", "", text)
|
|
# Remove images
|
|
text = re.sub(r"!\[.*?\]\(.*?\)", "", text)
|
|
# Convert links to just the link text
|
|
text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
|
|
# Remove ATX headings markers but keep text
|
|
text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)
|
|
# Remove bold/italic markers
|
|
text = re.sub(r"\*{1,3}([^*]+)\*{1,3}", r"\1", text)
|
|
text = re.sub(r"_{1,3}([^_]+)_{1,3}", r"\1", text)
|
|
# Remove horizontal rules
|
|
text = re.sub(r"^[-*_]{3,}\s*$", "", text, flags=re.MULTILINE)
|
|
# Remove blockquote markers
|
|
text = re.sub(r"^>\s?", "", text, flags=re.MULTILINE)
|
|
# Remove list markers
|
|
text = re.sub(r"^[\*\-\+]\s+", "", text, flags=re.MULTILINE)
|
|
text = re.sub(r"^\d+\.\s+", "", text, flags=re.MULTILINE)
|
|
# Collapse multiple blank lines
|
|
text = re.sub(r"\n{3,}", "\n\n", text)
|
|
return text.strip()
|
|
|
|
|
|
def tts_google(text: str, slug: str, output_path: str) -> None:
|
|
"""Generate audio with Google Cloud TTS (free tier: 1M chars/month)."""
|
|
import json
|
|
import urllib.request
|
|
|
|
api_key = os.environ.get("GOOGLE_API_KEY")
|
|
if not api_key:
|
|
raise EnvironmentError("GOOGLE_API_KEY is not set")
|
|
|
|
payload = json.dumps({
|
|
"input": {"text": text},
|
|
"voice": {
|
|
"languageCode": "nl-NL",
|
|
"name": "nl-NL-Wavenet-D",
|
|
"ssmlGender": "FEMALE",
|
|
},
|
|
"audioConfig": {"audioEncoding": "MP3"},
|
|
}).encode()
|
|
|
|
url = f"https://texttospeech.googleapis.com/v1/text:synthesize?key={api_key}"
|
|
req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/json"})
|
|
|
|
with urllib.request.urlopen(req) as resp:
|
|
body = json.loads(resp.read())
|
|
|
|
import base64
|
|
audio_bytes = base64.b64decode(body["audioContent"])
|
|
with open(output_path, "wb") as f:
|
|
f.write(audio_bytes)
|
|
|
|
|
|
def tts_mistral(text: str, slug: str, output_path: str) -> None:
|
|
"""Generate audio with Mistral Voxtral TTS (~$16/M chars)."""
|
|
import json
|
|
import urllib.request
|
|
|
|
api_key = os.environ.get("MISTRAL_API_KEY")
|
|
if not api_key:
|
|
raise EnvironmentError("MISTRAL_API_KEY is not set")
|
|
|
|
payload = json.dumps({
|
|
"model": "voxtral-mini-tts-2507",
|
|
"input": text,
|
|
"voice": "river",
|
|
}).encode()
|
|
|
|
url = "https://api.mistral.ai/v1/audio/speech"
|
|
req = urllib.request.Request(
|
|
url,
|
|
data=payload,
|
|
headers={
|
|
"Content-Type": "application/json",
|
|
"Authorization": f"Bearer {api_key}",
|
|
},
|
|
)
|
|
|
|
with urllib.request.urlopen(req) as resp:
|
|
audio_bytes = resp.read()
|
|
|
|
with open(output_path, "wb") as f:
|
|
f.write(audio_bytes)
|
|
|
|
|
|
def tts_openai(text: str, slug: str, output_path: str) -> None:
|
|
"""Generate audio with OpenAI TTS-1 (~$15/M chars)."""
|
|
import json
|
|
import urllib.request
|
|
|
|
api_key = os.environ.get("OPENAI_API_KEY")
|
|
if not api_key:
|
|
raise EnvironmentError("OPENAI_API_KEY is not set")
|
|
|
|
payload = json.dumps({
|
|
"model": "tts-1",
|
|
"input": text,
|
|
"voice": "nova",
|
|
}).encode()
|
|
|
|
url = "https://api.openai.com/v1/audio/speech"
|
|
req = urllib.request.Request(
|
|
url,
|
|
data=payload,
|
|
headers={
|
|
"Content-Type": "application/json",
|
|
"Authorization": f"Bearer {api_key}",
|
|
},
|
|
)
|
|
|
|
with urllib.request.urlopen(req) as resp:
|
|
audio_bytes = resp.read()
|
|
|
|
with open(output_path, "wb") as f:
|
|
f.write(audio_bytes)
|
|
|
|
|
|
PROVIDERS = {
|
|
"google": tts_google,
|
|
"mistral": tts_mistral,
|
|
"openai": tts_openai,
|
|
}
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Generate TTS audio for a blog post")
|
|
parser.add_argument("file", help="Path to the .md blog post")
|
|
parser.add_argument("--output-dir", default=".", help="Directory to write the .mp3 (default: .)")
|
|
args = parser.parse_args()
|
|
|
|
md_path = args.file
|
|
if not os.path.isfile(md_path):
|
|
print(f"ERROR: file not found: {md_path}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
with open(md_path, encoding="utf-8") as f:
|
|
raw = f.read()
|
|
|
|
meta, body = parse_frontmatter(raw)
|
|
|
|
slug = meta.get("slug") or os.path.splitext(os.path.basename(md_path))[0]
|
|
title = meta.get("title", "")
|
|
|
|
# Prepend title so TTS reads it aloud
|
|
full_text = f"{title}.\n\n{clean_markdown(body)}" if title else clean_markdown(body)
|
|
|
|
provider_name = os.environ.get("TTS_PROVIDER", "google").lower()
|
|
if provider_name not in PROVIDERS:
|
|
print(f"ERROR: unknown TTS_PROVIDER '{provider_name}'. Choose from: {', '.join(PROVIDERS)}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
output_path = os.path.join(args.output_dir, f"{slug}.mp3")
|
|
|
|
print(f"Generating audio for '{slug}' using provider '{provider_name}' ({len(full_text)} chars)...")
|
|
|
|
try:
|
|
PROVIDERS[provider_name](full_text, slug, output_path)
|
|
except Exception as exc:
|
|
print(f"ERROR: TTS generation failed: {exc}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
print(f"Saved: {output_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|