Reddit's official API requires OAuth registration, has strict rate limits, and recently introduced paid tiers that make large-scale data collection expensive. The Scavio API provides an alternative: query Reddit posts and comments through a single authenticated endpoint that returns structured JSON with subreddit, author, score, timestamps, and full comment trees. This tutorial shows how to search Reddit posts, fetch individual threads with comments, and build a data collection pipeline without touching the official Reddit API.
Prerequisites
- Python 3.8 or higher
- requests library installed
- A Scavio API key
- Topics or subreddits you want to collect data from
Walkthrough
Step 1: Search Reddit posts by keyword
Use the Scavio Reddit search endpoint to find posts matching a keyword. Results include post title, subreddit, author, score, and timestamp.
import os
import requests
API_KEY = os.environ["SCAVIO_API_KEY"]
def search_reddit(query: str, sort: str = "relevance") -> list[dict]:
r = requests.post(
"https://api.scavio.dev/api/v1/reddit/search",
headers={"Authorization": f"Bearer {API_KEY}"},
json={"query": query, "sort": sort},
timeout=30
)
r.raise_for_status()
return r.json()["data"]["posts"]Step 2: Fetch a full post with comments
Given a Reddit post URL, fetch the complete post plus all threaded comments in a single call.
def fetch_thread(url: str) -> dict:
r = requests.post(
"https://api.scavio.dev/api/v1/reddit/post",
headers={"Authorization": f"Bearer {API_KEY}"},
json={"url": url},
timeout=30
)
r.raise_for_status()
return r.json()["data"]Step 3: Extract structured data from results
Parse posts and comments into a flat, analysis-ready format with relevant fields extracted.
def extract_post_data(post: dict) -> dict:
return {
"id": post.get("id"),
"title": post.get("title"),
"subreddit": post.get("subreddit"),
"author": post.get("author"),
"score": post.get("score"),
"timestamp": post.get("timestamp"),
"url": post.get("url"),
}
def extract_comments(thread: dict) -> list[dict]:
return [{
"author": c.get("author"),
"body": c.get("body"),
"score": c.get("score"),
"depth": c.get("depth"),
} for c in thread.get("comments", [])]Step 4: Export to JSON
Save the collected posts and comments as a structured JSON dataset.
import json
def collect_dataset(query: str, max_threads: int = 5) -> dict:
posts = search_reddit(query, sort="new")
dataset = []
for post in posts[:max_threads]:
thread = fetch_thread(post["url"])
dataset.append({
"post": extract_post_data(post),
"comments": extract_comments(thread),
})
return {"query": query, "threads": dataset}
with open("reddit_data.json", "w") as f:
json.dump(collect_dataset("python web frameworks"), f, indent=2)Python Example
import os
import json
import requests
API_KEY = os.environ["SCAVIO_API_KEY"]
def search_reddit(query: str) -> list[dict]:
r = requests.post("https://api.scavio.dev/api/v1/reddit/search",
headers={"Authorization": f"Bearer {API_KEY}"},
json={"query": query, "sort": "new"}, timeout=30)
r.raise_for_status()
return r.json()["data"]["posts"]
def fetch_thread(url: str) -> dict:
r = requests.post("https://api.scavio.dev/api/v1/reddit/post",
headers={"Authorization": f"Bearer {API_KEY}"},
json={"url": url}, timeout=30)
r.raise_for_status()
return r.json()["data"]
if __name__ == "__main__":
posts = search_reddit("best python frameworks 2026")
print(f"Found {len(posts)} posts")
for p in posts[:3]:
print(f" r/{p['subreddit']}: {p['title']}")
thread = fetch_thread(p["url"])
comments = thread.get("comments", [])
print(f" {len(comments)} comments")JavaScript Example
const API_KEY = process.env.SCAVIO_API_KEY;
async function searchReddit(query) {
const r = await fetch("https://api.scavio.dev/api/v1/reddit/search", {
method: "POST",
headers: { Authorization: `Bearer ${API_KEY}`, "Content-Type": "application/json" },
body: JSON.stringify({ query, sort: "new" })
});
return (await r.json()).data.posts;
}
async function fetchThread(url) {
const r = await fetch("https://api.scavio.dev/api/v1/reddit/post", {
method: "POST",
headers: { Authorization: `Bearer ${API_KEY}`, "Content-Type": "application/json" },
body: JSON.stringify({ url })
});
return (await r.json()).data;
}
async function main() {
const posts = await searchReddit("best python frameworks 2026");
for (const p of posts.slice(0, 3)) {
console.log(`r/${p.subreddit}: ${p.title}`);
const thread = await fetchThread(p.url);
console.log(` ${thread.comments?.length || 0} comments`);
}
}
main().catch(console.error);Expected Output
Found 14 posts
r/Python: FastAPI vs Django for new projects in 2026
47 comments
r/webdev: What Python framework are you using this year?
23 comments
r/learnpython: Best framework to learn as a beginner?
18 comments