A news aggregator that combines Google News articles, Reddit discussions, and YouTube videos for any topic gives a more complete picture than any single source. Google News provides editorial coverage, Reddit surfaces community reactions, and YouTube captures video commentary. This tutorial builds a multi-source aggregator using the Scavio API that queries all three platforms, normalizes the results into a common format, deduplicates by URL, and ranks by a combined relevance score.
Prerequisites
- Python 3.10 or higher
- requests library installed
- A Scavio API key
- Topics or keywords to aggregate news for
Walkthrough
Step 1: Query all three sources
Fetch Google News results, Reddit posts, and YouTube videos for the same topic using the Scavio API.
from concurrent.futures import ThreadPoolExecutor
def fetch_google_news(topic: str) -> list[dict]:
r = requests.post(ENDPOINT, headers={"x-api-key": API_KEY},
json={"query": f"{topic} news", "country_code": "us"})
r.raise_for_status()
return r.json().get("news_results", r.json().get("organic_results", []))
def fetch_reddit(topic: str) -> list[dict]:
r = requests.post(ENDPOINT, headers={"x-api-key": API_KEY},
json={"platform": "reddit", "query": topic})
r.raise_for_status()
return r.json().get("data", {}).get("posts", [])
def fetch_youtube(topic: str) -> list[dict]:
r = requests.post(ENDPOINT, headers={"x-api-key": API_KEY},
json={"platform": "youtube", "query": topic})
r.raise_for_status()
return r.json().get("videos", [])Step 2: Normalize results into a common format
Transform results from each platform into a uniform structure with title, url, source, and snippet.
def normalize_google(item: dict) -> dict:
return {"title": item.get("title"), "url": item.get("link"), "source": "google", "snippet": item.get("snippet", ""), "date": item.get("date")}
def normalize_reddit(post: dict) -> dict:
return {"title": post.get("title"), "url": post.get("url"), "source": "reddit", "snippet": f"r/{post.get('subreddit', '')}", "date": post.get("timestamp")}
def normalize_youtube(video: dict) -> dict:
return {"title": video.get("title"), "url": video.get("url"), "source": "youtube", "snippet": video.get("description", "")[:100], "date": video.get("published_at")}Step 3: Deduplicate by URL
Remove duplicate entries that appear across sources using URL as the deduplication key.
def deduplicate(items: list[dict]) -> list[dict]:
seen = {}
for item in items:
url = item.get("url", "")
if url and url not in seen:
seen[url] = item
return list(seen.values())Step 4: Output the aggregated feed
Print the combined, deduplicated feed grouped by source for easy consumption.
def aggregate(topic: str) -> list[dict]:
with ThreadPoolExecutor(max_workers=3) as ex:
g = ex.submit(fetch_google_news, topic)
r = ex.submit(fetch_reddit, topic)
y = ex.submit(fetch_youtube, topic)
items = [normalize_google(i) for i in g.result()[:5]]
items += [normalize_reddit(i) for i in r.result()[:5]]
items += [normalize_youtube(i) for i in y.result()[:5]]
return deduplicate(items)Python Example
import os
import requests
from concurrent.futures import ThreadPoolExecutor
API_KEY = os.environ.get("SCAVIO_API_KEY", "your_scavio_api_key")
ENDPOINT = "https://api.scavio.dev/api/v1/search"
def fetch(body: dict) -> dict:
r = requests.post(ENDPOINT, headers={"x-api-key": API_KEY}, json=body)
r.raise_for_status()
return r.json()
def aggregate(topic: str) -> list[dict]:
with ThreadPoolExecutor(max_workers=3) as ex:
g = ex.submit(fetch, {"query": f"{topic} news", "country_code": "us"})
r = ex.submit(fetch, {"platform": "reddit", "query": topic})
y = ex.submit(fetch, {"platform": "youtube", "query": topic})
items = []
for i in (g.result().get("news_results") or g.result().get("organic_results", []))[:5]:
items.append({"src": "google", "title": i.get("title"), "url": i.get("link")})
for p in r.result().get("data", {}).get("posts", [])[:5]:
items.append({"src": "reddit", "title": p.get("title"), "url": p.get("url")})
for v in y.result().get("videos", [])[:5]:
items.append({"src": "youtube", "title": v.get("title"), "url": v.get("url")})
return items
if __name__ == "__main__":
for item in aggregate("AI agents 2026"):
print(f"[{item['src']:>7}] {item['title'][:60]}")JavaScript Example
const API_KEY = process.env.SCAVIO_API_KEY || "your_scavio_api_key";
const ENDPOINT = "https://api.scavio.dev/api/v1/search";
async function call(body) {
const res = await fetch(ENDPOINT, {
method: "POST",
headers: { "x-api-key": API_KEY, "Content-Type": "application/json" },
body: JSON.stringify(body)
});
return res.json();
}
async function aggregate(topic) {
const [g, r, y] = await Promise.all([
call({ query: `${topic} news`, country_code: "us" }),
call({ platform: "reddit", query: topic }),
call({ platform: "youtube", query: topic })
]);
const items = [];
(g.news_results || g.organic_results || []).slice(0, 5).forEach(i => items.push({ src: "google", title: i.title }));
(r.data?.posts || []).slice(0, 5).forEach(p => items.push({ src: "reddit", title: p.title }));
(y.videos || []).slice(0, 5).forEach(v => items.push({ src: "youtube", title: v.title }));
return items;
}
aggregate("AI agents 2026").then(items => items.forEach(i => console.log(`[${i.src}] ${i.title}`))).catch(console.error);Expected Output
[ google] OpenAI Launches Agent Building Platform for Enterprise
[ google] Anthropic Expands Claude Agent Capabilities
[ reddit] Has anyone deployed AI agents in production yet?
[ reddit] Best frameworks for building AI agents in 2026
[youtube] I Built an AI Agent That Runs My Business
[youtube] AI Agents Explained - Complete 2026 Guide