YouTube transcripts are valuable input for summarization models, RAG pipelines, content analysis, and accessibility tools. YouTube's official data API does not expose transcripts directly — developers typically resort to parsing auto-generated captions from internal endpoints, which break without notice. The Scavio transcript endpoint accepts a video_id and returns the full timestamped transcript as structured JSON. This tutorial shows how to call the endpoint, handle the response, and reassemble the text for downstream processing.
Prerequisites
- Python 3.8 or higher installed
- requests library installed
- A Scavio API key
- A YouTube video ID to test with (e.g. dQw4w9WgXcQ)
Walkthrough
Step 1: Identify the video ID
The video ID is the 11-character string after v= in a YouTube URL. For https://youtube.com/watch?v=dQw4w9WgXcQ the ID is dQw4w9WgXcQ.
from urllib.parse import urlparse, parse_qs
def extract_video_id(url: str) -> str:
parsed = urlparse(url)
return parse_qs(parsed.query).get("v", [url])[0]Step 2: Call the transcript endpoint
POST to the Scavio endpoint with platform youtube, action transcript, and the video_id.
import requests
response = requests.post(
"https://api.scavio.dev/api/v1/search",
headers={"x-api-key": API_KEY},
json={"platform": "youtube", "action": "transcript", "video_id": "dQw4w9WgXcQ"}
)
data = response.json()Step 3: Reassemble the transcript text
The transcript key contains a list of segments with start, duration, and text. Join them to produce a plain-text document.
segments = data.get("transcript", [])
full_text = " ".join(seg["text"] for seg in segments)
print(full_text[:500])Step 4: Save to file for downstream use
Write the transcript to disk so it can be ingested by a vector store or summarization model.
with open("transcript.txt", "w", encoding="utf-8") as f:
f.write(full_text)
print(f"Saved {len(segments)} segments, {len(full_text)} characters")Python Example
import os
import requests
API_KEY = os.environ.get("SCAVIO_API_KEY", "your_scavio_api_key")
ENDPOINT = "https://api.scavio.dev/api/v1/search"
def get_transcript(video_id: str) -> list[dict]:
response = requests.post(
ENDPOINT,
headers={"x-api-key": API_KEY},
json={"platform": "youtube", "action": "transcript", "video_id": video_id}
)
response.raise_for_status()
return response.json().get("transcript", [])
def segments_to_text(segments: list[dict]) -> str:
return " ".join(seg["text"] for seg in segments)
def main():
video_id = "dQw4w9WgXcQ"
segments = get_transcript(video_id)
text = segments_to_text(segments)
print(f"Transcript: {len(segments)} segments, {len(text)} chars")
print(text[:300])
if __name__ == "__main__":
main()JavaScript Example
const API_KEY = process.env.SCAVIO_API_KEY || "your_scavio_api_key";
const ENDPOINT = "https://api.scavio.dev/api/v1/search";
async function getTranscript(videoId) {
const response = await fetch(ENDPOINT, {
method: "POST",
headers: { "x-api-key": API_KEY, "Content-Type": "application/json" },
body: JSON.stringify({ platform: "youtube", action: "transcript", video_id: videoId })
});
if (!response.ok) throw new Error(`HTTP ${response.status}`);
const data = await response.json();
return data.transcript || [];
}
function segmentsToText(segments) {
return segments.map(s => s.text).join(" ");
}
async function main() {
const segments = await getTranscript("dQw4w9WgXcQ");
const text = segmentsToText(segments);
console.log(`${segments.length} segments, ${text.length} chars`);
console.log(text.slice(0, 300));
}
main().catch(console.error);Expected Output
{
"video_id": "dQw4w9WgXcQ",
"language": "en",
"transcript": [
{ "start": 0.0, "duration": 3.2, "text": "We're no strangers to love" },
{ "start": 3.2, "duration": 2.8, "text": "You know the rules and so do I" },
{ "start": 6.0, "duration": 3.5, "text": "A full commitment's what I'm thinking of" }
]
}