Add Reddit grounding to an AI agent by identifying queries that benefit from community experience data, searching Reddit through a search API, parsing thread titles and comments, and injecting the community context into the agent's prompt. LLMs trained on web data already contain some Reddit knowledge, but it is outdated and generic. Live Reddit search surfaces current opinions, real user experiences, and community consensus that no training dataset can match. This is particularly valuable for product recommendations, troubleshooting, and "has anyone tried X" questions.
Prerequisites
- Python 3.8+ installed
- requests library installed
- A Scavio API key from scavio.dev
- An existing AI agent to add grounding to
Walkthrough
Step 1: Identify queries needing community data
Build a classifier that detects when a user query would benefit from Reddit community data versus general web search.
import os, requests
API_KEY = os.environ['SCAVIO_API_KEY']
REDDIT_SIGNALS = [
'has anyone', 'anyone tried', 'experience with', 'thoughts on',
'recommendation', 'recommend', 'which is better', 'worth it',
'honest review', 'real experience', 'actually use', 'daily driver',
'regret buying', 'switched from', 'alternative to',
]
def needs_reddit(query: str) -> bool:
q_lower = query.lower()
if any(signal in q_lower for signal in REDDIT_SIGNALS):
return True
# Product recommendation pattern
if 'best' in q_lower and ('for' in q_lower or '?' in q_lower):
return True
return False
print(needs_reddit('Has anyone tried the new Pixel 9?')) # True
print(needs_reddit('Python list comprehension syntax')) # False
print(needs_reddit('Best CRM for small teams?')) # TrueStep 2: Search Reddit via API
Search Reddit through Scavio to find relevant threads and discussions.
def search_reddit(query: str, limit: int = 5) -> list:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY},
json={'platform': 'reddit', 'query': query}, timeout=15)
resp.raise_for_status()
results = resp.json().get('organic_results', [])[:limit]
return [{
'title': r.get('title', ''),
'url': r.get('link', ''),
'snippet': r.get('snippet', ''),
'subreddit': r.get('subreddit', r.get('source', '')),
} for r in results]
threads = search_reddit('best CRM for small business')
for t in threads:
print(f"r/{t['subreddit']}: {t['title'][:60]}")Step 3: Parse thread context
Extract and format Reddit thread data into a concise context block for the agent.
def format_reddit_context(threads: list) -> str:
if not threads:
return ''
parts = ['COMMUNITY CONTEXT (from Reddit):']
for t in threads[:3]:
parts.append(f"\nThread: {t['title']}")
if t.get('subreddit'):
parts.append(f"Subreddit: {t['subreddit']}")
if t.get('snippet'):
parts.append(f"Summary: {t['snippet'][:200]}")
parts.append('\nNote: These are community opinions, not verified facts.')
return '\n'.join(parts)
context = format_reddit_context(threads)
print(context)Step 4: Inject context into agent prompt
Add the Reddit context to the agent's system prompt when the query matches community data patterns.
def grounded_prompt(query: str, system_prompt: str = 'You are a helpful assistant.') -> str:
prompt_parts = [system_prompt]
if needs_reddit(query):
threads = search_reddit(query)
reddit_context = format_reddit_context(threads)
if reddit_context:
prompt_parts.append(reddit_context)
prompt_parts.append('Use the community context above to inform your response. Cite specific threads when relevant.')
prompt_parts.append(f'\nUser: {query}')
return '\n\n'.join(prompt_parts)
prompt = grounded_prompt('Has anyone tried the Framework laptop? Is it worth it?')
print(prompt[:500])Step 5: Test grounding quality
Compare agent responses with and without Reddit grounding to measure the quality improvement.
def test_grounding(queries: list):
for q in queries:
is_reddit = needs_reddit(q)
if is_reddit:
threads = search_reddit(q)
print(f'Query: {q}')
print(f' Reddit grounding: YES ({len(threads)} threads found)')
for t in threads[:2]:
print(f' - {t["title"][:60]}')
else:
print(f'Query: {q}')
print(f' Reddit grounding: NO (general web search sufficient)')
print()
test_queries = [
'Has anyone tried Cursor IDE?',
'Python dictionary methods',
'Best standing desk recommendation?',
'Experience with Hetzner hosting?',
'SQL JOIN syntax',
]
test_grounding(test_queries)Python Example
import requests, os
H = {'x-api-key': os.environ['SCAVIO_API_KEY']}
def reddit_ground(query):
data = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': 'reddit', 'query': query}).json()
threads = data.get('organic_results', [])[:3]
return '\n'.join(f"{t.get('title', '')}: {t.get('snippet', '')[:100]}" for t in threads)
print(reddit_ground('best CRM for small business'))JavaScript Example
const H = {'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json'};
async function redditGround(query) {
const r = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: H, body: JSON.stringify({platform: 'reddit', query})
});
const threads = (await r.json()).organic_results || [];
return threads.slice(0, 3).map(t => `${t.title}: ${(t.snippet || '').slice(0, 100)}`).join('\n');
}
redditGround('best CRM for small business').then(console.log);Expected Output
An AI agent that detects community-data queries, searches Reddit for relevant threads, and injects community context into its responses for more authentic recommendations.