Miscellaneous code and documentation updates
This commit is contained in:
+198
-77
@@ -6,8 +6,8 @@ Usage:
|
||||
python scripts/tts_generate.py src/content/blog/my-post.md
|
||||
|
||||
Environment variables:
|
||||
TTS_PROVIDER - "google" (default), "mistral", or "openai"
|
||||
GOOGLE_API_KEY - Required when TTS_PROVIDER=google
|
||||
TTS_PROVIDER - "google" (default), "mistral", or "openai"
|
||||
GOOGLE_API_KEY - Required when TTS_PROVIDER=google
|
||||
MISTRAL_API_KEY - Required when TTS_PROVIDER=mistral
|
||||
OPENAI_API_KEY - Required when TTS_PROVIDER=openai
|
||||
|
||||
@@ -21,6 +21,23 @@ import re
|
||||
import sys
|
||||
|
||||
|
||||
def load_dotenv() -> None:
|
||||
"""Load .env from the project root into os.environ (stdlib only, never overwrites)."""
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
env_path = os.path.join(project_root, ".env")
|
||||
if not os.path.isfile(env_path):
|
||||
return
|
||||
with open(env_path, encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, _, value = line.partition("=")
|
||||
key = key.strip()
|
||||
value = value.strip().strip('"').strip("'")
|
||||
os.environ.setdefault(key, value)
|
||||
|
||||
|
||||
def parse_frontmatter(text: str) -> tuple[dict, str]:
|
||||
"""Extract YAML frontmatter and return (metadata_dict, body)."""
|
||||
if not text.startswith("---"):
|
||||
@@ -31,7 +48,7 @@ def parse_frontmatter(text: str) -> tuple[dict, str]:
|
||||
return {}, text
|
||||
|
||||
front = text[3:end].strip()
|
||||
body = text[end + 4:].strip()
|
||||
body = text[end + 4 :].strip()
|
||||
|
||||
meta: dict = {}
|
||||
for line in front.splitlines():
|
||||
@@ -69,99 +86,195 @@ def clean_markdown(text: str) -> str:
|
||||
return text.strip()
|
||||
|
||||
|
||||
def tts_google(text: str, slug: str, output_path: str) -> None:
|
||||
"""Generate audio with Google Cloud TTS (free tier: 1M chars/month)."""
|
||||
def split_into_chunks(text: str, max_chars: int = 4000) -> list[str]:
|
||||
"""Split text into chunks that fit within max_chars.
|
||||
|
||||
Splits on paragraph boundaries first; falls back to sentence boundaries
|
||||
for paragraphs that are still too long. No text is ever discarded.
|
||||
"""
|
||||
paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
|
||||
|
||||
# Flatten into atomic segments (paragraphs or individual sentences)
|
||||
segments: list[str] = []
|
||||
for para in paragraphs:
|
||||
if len(para) <= max_chars:
|
||||
segments.append(para)
|
||||
else:
|
||||
sentences = re.split(r"(?<=[.!?])\s+", para)
|
||||
segments.extend(s for s in sentences if s.strip())
|
||||
|
||||
# Merge segments greedily into chunks under max_chars
|
||||
chunks: list[str] = []
|
||||
current = ""
|
||||
for segment in segments:
|
||||
if not current:
|
||||
current = segment
|
||||
elif len(current) + 2 + len(segment) <= max_chars:
|
||||
current += "\n\n" + segment
|
||||
else:
|
||||
chunks.append(current)
|
||||
current = segment
|
||||
if current:
|
||||
chunks.append(current)
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
def merge_audio_chunks(chunks: list[bytes], output_path: str) -> None:
|
||||
"""Concatenate MP3 byte chunks with a ~400ms silent pause between each.
|
||||
|
||||
MP3 frames are self-contained, so byte concatenation produces a valid file.
|
||||
Silent frame: 128kbps/44100Hz frame header + null payload = 417 bytes.
|
||||
16 frames * ~26.1ms each ≈ 418ms of silence.
|
||||
"""
|
||||
silent_frame = b"\xff\xfb\x90\x00" + b"\x00" * 413 # 417 bytes
|
||||
silence = silent_frame * 16
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
for i, chunk in enumerate(chunks):
|
||||
f.write(chunk)
|
||||
if i < len(chunks) - 1:
|
||||
f.write(silence)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider helpers — return raw MP3 bytes for a single text chunk
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _google_synthesize(text: str, api_key: str) -> bytes:
|
||||
import base64
|
||||
import json
|
||||
import urllib.request
|
||||
|
||||
payload = json.dumps(
|
||||
{
|
||||
"input": {"text": text},
|
||||
"voice": {
|
||||
"languageCode": "nl-NL",
|
||||
"name": "nl-NL-Wavenet-D",
|
||||
"ssmlGender": "MALE",
|
||||
},
|
||||
"audioConfig": {"audioEncoding": "MP3"},
|
||||
}
|
||||
).encode()
|
||||
|
||||
url = f"https://texttospeech.googleapis.com/v1/text:synthesize?key={api_key}"
|
||||
req = urllib.request.Request(
|
||||
url, data=payload, headers={"Content-Type": "application/json"}
|
||||
)
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
body = json.loads(resp.read())
|
||||
|
||||
return base64.b64decode(body["audioContent"])
|
||||
|
||||
|
||||
def _mistral_synthesize(text: str, api_key: str) -> bytes:
|
||||
import base64
|
||||
import json
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
body: dict = {
|
||||
"model": "voxtral-mini-tts-2603",
|
||||
"input": text,
|
||||
"response_format": "mp3",
|
||||
}
|
||||
voice_id = os.environ.get("MISTRAL_VOICE_ID", "").strip()
|
||||
if not voice_id:
|
||||
raise EnvironmentError(
|
||||
"MISTRAL_VOICE_ID is not set. "
|
||||
"Create a voice at https://console.mistral.ai and add its ID to .env"
|
||||
)
|
||||
body["voice"] = voice_id
|
||||
|
||||
req = urllib.request.Request(
|
||||
"https://api.mistral.ai/v1/audio/speech",
|
||||
data=json.dumps(body).encode(),
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
data = json.loads(resp.read())
|
||||
return base64.b64decode(data["audio_data"])
|
||||
except urllib.error.HTTPError as exc:
|
||||
detail = exc.read().decode("utf-8", errors="replace")
|
||||
raise RuntimeError(f"Mistral API {exc.code}: {detail}") from exc
|
||||
|
||||
|
||||
def _openai_synthesize(text: str, api_key: str) -> bytes:
|
||||
import json
|
||||
import urllib.request
|
||||
|
||||
payload = json.dumps(
|
||||
{
|
||||
"model": "gpt-4o-mini-tts",
|
||||
"input": text,
|
||||
"voice": "ash",
|
||||
"instructions": "Read aloud in a warm and friendly tone.",
|
||||
}
|
||||
).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
"https://api.openai.com/v1/audio/speech",
|
||||
data=payload,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
return resp.read()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public provider functions — chunk, call API per chunk, merge
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def tts_google(text: str, slug: str, output_path: str) -> None:
|
||||
"""Generate audio with Google Cloud TTS (free tier: 1M chars/month)."""
|
||||
api_key = os.environ.get("GOOGLE_API_KEY")
|
||||
if not api_key:
|
||||
raise EnvironmentError("GOOGLE_API_KEY is not set")
|
||||
|
||||
payload = json.dumps({
|
||||
"input": {"text": text},
|
||||
"voice": {
|
||||
"languageCode": "nl-NL",
|
||||
"name": "nl-NL-Wavenet-D",
|
||||
"ssmlGender": "FEMALE",
|
||||
},
|
||||
"audioConfig": {"audioEncoding": "MP3"},
|
||||
}).encode()
|
||||
|
||||
url = f"https://texttospeech.googleapis.com/v1/text:synthesize?key={api_key}"
|
||||
req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/json"})
|
||||
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
body = json.loads(resp.read())
|
||||
|
||||
import base64
|
||||
audio_bytes = base64.b64decode(body["audioContent"])
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(audio_bytes)
|
||||
chunks = split_into_chunks(text)
|
||||
audio_chunks: list[bytes] = []
|
||||
for i, chunk in enumerate(chunks, 1):
|
||||
print(f" Chunk {i}/{len(chunks)} ({len(chunk)} chars)...")
|
||||
audio_chunks.append(_google_synthesize(chunk, api_key))
|
||||
merge_audio_chunks(audio_chunks, output_path)
|
||||
|
||||
|
||||
def tts_mistral(text: str, slug: str, output_path: str) -> None:
|
||||
"""Generate audio with Mistral Voxtral TTS (~$16/M chars)."""
|
||||
import json
|
||||
import urllib.request
|
||||
|
||||
api_key = os.environ.get("MISTRAL_API_KEY")
|
||||
if not api_key:
|
||||
raise EnvironmentError("MISTRAL_API_KEY is not set")
|
||||
|
||||
payload = json.dumps({
|
||||
"model": "voxtral-mini-tts-2507",
|
||||
"input": text,
|
||||
"voice": "river",
|
||||
}).encode()
|
||||
|
||||
url = "https://api.mistral.ai/v1/audio/speech"
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=payload,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
},
|
||||
)
|
||||
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
audio_bytes = resp.read()
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(audio_bytes)
|
||||
chunks = split_into_chunks(text)
|
||||
audio_chunks: list[bytes] = []
|
||||
for i, chunk in enumerate(chunks, 1):
|
||||
print(f" Chunk {i}/{len(chunks)} ({len(chunk)} chars)...")
|
||||
audio_chunks.append(_mistral_synthesize(chunk, api_key))
|
||||
merge_audio_chunks(audio_chunks, output_path)
|
||||
|
||||
|
||||
def tts_openai(text: str, slug: str, output_path: str) -> None:
|
||||
"""Generate audio with OpenAI TTS-1 (~$15/M chars)."""
|
||||
import json
|
||||
import urllib.request
|
||||
|
||||
api_key = os.environ.get("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
raise EnvironmentError("OPENAI_API_KEY is not set")
|
||||
|
||||
payload = json.dumps({
|
||||
"model": "tts-1",
|
||||
"input": text,
|
||||
"voice": "nova",
|
||||
}).encode()
|
||||
|
||||
url = "https://api.openai.com/v1/audio/speech"
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=payload,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
},
|
||||
)
|
||||
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
audio_bytes = resp.read()
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(audio_bytes)
|
||||
chunks = split_into_chunks(text)
|
||||
audio_chunks: list[bytes] = []
|
||||
for i, chunk in enumerate(chunks, 1):
|
||||
print(f" Chunk {i}/{len(chunks)} ({len(chunk)} chars)...")
|
||||
audio_chunks.append(_openai_synthesize(chunk, api_key))
|
||||
merge_audio_chunks(audio_chunks, output_path)
|
||||
|
||||
|
||||
PROVIDERS = {
|
||||
@@ -172,9 +285,13 @@ PROVIDERS = {
|
||||
|
||||
|
||||
def main() -> None:
|
||||
load_dotenv()
|
||||
|
||||
parser = argparse.ArgumentParser(description="Generate TTS audio for a blog post")
|
||||
parser.add_argument("file", help="Path to the .md blog post")
|
||||
parser.add_argument("--output-dir", default=".", help="Directory to write the .mp3 (default: .)")
|
||||
parser.add_argument(
|
||||
"--output-dir", default=".", help="Directory to write the .mp3 (default: .)"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
md_path = args.file
|
||||
@@ -190,17 +307,21 @@ def main() -> None:
|
||||
slug = meta.get("slug") or os.path.splitext(os.path.basename(md_path))[0]
|
||||
title = meta.get("title", "")
|
||||
|
||||
# Prepend title so TTS reads it aloud
|
||||
full_text = f"{title}.\n\n{clean_markdown(body)}" if title else clean_markdown(body)
|
||||
|
||||
provider_name = os.environ.get("TTS_PROVIDER", "google").lower()
|
||||
if provider_name not in PROVIDERS:
|
||||
print(f"ERROR: unknown TTS_PROVIDER '{provider_name}'. Choose from: {', '.join(PROVIDERS)}", file=sys.stderr)
|
||||
print(
|
||||
f"ERROR: unknown TTS_PROVIDER '{provider_name}'. Choose from: {', '.join(PROVIDERS)}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
output_path = os.path.join(args.output_dir, f"{slug}.mp3")
|
||||
|
||||
print(f"Generating audio for '{slug}' using provider '{provider_name}' ({len(full_text)} chars)...")
|
||||
print(
|
||||
f"Generating audio for '{slug}' using provider '{provider_name}' ({len(full_text)} chars)..."
|
||||
)
|
||||
|
||||
try:
|
||||
PROVIDERS[provider_name](full_text, slug, output_path)
|
||||
|
||||
Reference in New Issue
Block a user