Karpathy's LLM Wiki project sparked discussion on r/AI_Agents about ingestion pipelines. Most wiki builders use separate tools for Google, Reddit, YouTube, and Amazon data. This tutorial replaces them all with a single API that covers multiple platforms, reducing integration complexity from 5 SDKs to 1.
Prerequisites
- Scavio API key
- Python 3.8+
- Markdown-based wiki (git repo or filesystem)
Walkthrough
Step 1: Define wiki topic research function
One function searches all platforms for a topic.
import requests, os
H = {'x-api-key': os.environ['SCAVIO_API_KEY']}
URL = 'https://api.scavio.dev/api/v1/search'
def research_topic(topic):
sources = {}
for platform in ['google', 'reddit', 'youtube']:
data = requests.post(URL, headers=H,
json={'platform': platform, 'query': topic}).json()
sources[platform] = data.get('results', []) or data.get('organic_results', [])
return sourcesStep 2: Generate wiki entry from multi-platform data
Combine search results into a structured wiki page.
def generate_wiki_entry(topic, sources):
entry = f'# {topic}\n\n'
entry += '## Overview\n'
# Use top Google results for the overview
for r in sources.get('google', [])[:3]:
entry += f"- [{r.get('title', '')}]({r.get('link', '')}): {r.get('snippet', '')}\n"
entry += '\n## Community Discussion\n'
for r in sources.get('reddit', [])[:3]:
entry += f"- [{r.get('title', '')}]({r.get('url', '')})\n"
entry += '\n## Video Resources\n'
for r in sources.get('youtube', [])[:3]:
entry += f"- {r.get('title', '')}\n"
return entryStep 3: Build the wiki in batch
Process a list of topics and save as markdown files.
import os
def build_wiki(topics, output_dir='wiki'):
os.makedirs(output_dir, exist_ok=True)
for topic in topics:
slug = topic.lower().replace(' ', '-')
sources = research_topic(topic)
entry = generate_wiki_entry(topic, sources)
with open(f'{output_dir}/{slug}.md', 'w') as f:
f.write(entry)
print(f'Built wiki page: {slug}.md')
topics = ['transformer architecture', 'RLHF training', 'RAG pipeline', 'MCP protocol']
build_wiki(topics)Step 4: Add freshness checks
Re-research topics that are older than 7 days.
import datetime
def needs_refresh(filepath, max_age_days=7):
if not os.path.exists(filepath):
return True
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(filepath))
return (datetime.datetime.now() - mtime).days > max_age_days
def refresh_wiki(topics, output_dir='wiki'):
for topic in topics:
slug = topic.lower().replace(' ', '-')
path = f'{output_dir}/{slug}.md'
if needs_refresh(path):
sources = research_topic(topic)
entry = generate_wiki_entry(topic, sources)
with open(path, 'w') as f:
f.write(entry)
print(f'Refreshed: {slug}')Python Example
import os, requests
H = {'x-api-key': os.environ['SCAVIO_API_KEY']}
def wiki_page(topic):
data = {}
for p in ['google', 'reddit', 'youtube']:
r = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': p, 'query': topic}).json()
data[p] = r.get('results', []) or r.get('organic_results', [])
return data
# 4 topics x 3 platforms = 12 queries = $0.06JavaScript Example
const platforms = ['google', 'reddit', 'youtube'];
const sources = {};
for (const platform of platforms) {
const res = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: {'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json'},
body: JSON.stringify({platform, query: topic})
});
sources[platform] = await res.json();
}Expected Output
Markdown wiki with multi-platform sources per topic. One API key replaces separate Google, Reddit, and YouTube ingestion tools. 3 queries per topic = $0.015.