Reddit discussions contain explicit buying intent that most lead gen tools miss. When someone posts 'looking for an API to track Amazon prices' or 'best alternative to Tavily', they are telling you what they need. This tutorial builds a pipeline that monitors Reddit for intent-rich discussions, scores purchase readiness, and outputs qualified leads. Monitoring 20 keywords daily costs $0.10/day.
Prerequisites
- Python 3.8+ installed
- requests library installed
- A Scavio API key from scavio.dev
- Intent keywords related to your product
Walkthrough
Step 1: Define intent keywords and scoring rules
Create keyword groups signaling different buying intent levels.
import os, requests, json
from datetime import datetime
API_KEY = os.environ['SCAVIO_API_KEY']
H = {'x-api-key': API_KEY, 'Content-Type': 'application/json'}
HIGH_INTENT = ['looking for API', 'best alternative to', 'recommend a', 'switching from', 'need an API for']
MED_INTENT = ['how to track', 'how to monitor', 'building a tool', 'automate', 'API for']
SEARCH_QUERIES = [
'looking for serp api', 'best tavily alternative',
'api to track amazon prices', 'tiktok data api recommendation',
]Step 2: Search Reddit and score intent
Query Reddit through Scavio and score each discussion by intent signals.
def search_reddit(query):
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers=H, json={'query': query, 'platform': 'reddit', 'country_code': 'us'})
return resp.json().get('organic_results', [])[:10]
def score_intent(title, snippet):
text = (title + ' ' + snippet).lower()
score = sum(3 for kw in HIGH_INTENT if kw.lower() in text)
score += sum(1 for kw in MED_INTENT if kw.lower() in text)
return scoreStep 3: Run the pipeline and rank leads
Search all queries, score discussions, and output ranked leads.
def run_monitor():
leads = []
for query in SEARCH_QUERIES:
for r in search_reddit(query):
score = score_intent(r.get('title', ''), r.get('snippet', ''))
if score > 0:
leads.append({'title': r['title'], 'url': r['link'],
'snippet': r.get('snippet', ''), 'score': score, 'query': query})
leads.sort(key=lambda x: x['score'], reverse=True)
print(f'Found {len(leads)} intent signals from {len(SEARCH_QUERIES)} queries')
print(f'Cost: ${len(SEARCH_QUERIES) * 0.005:.3f}\n')
for l in leads[:10]:
print(f'[Score: {l["score"]}] {l["title"][:70]}')
print(f' {l["url"]}\n')
return leads
leads = run_monitor()Step 4: Deduplicate and export
Save leads to JSON with URL-based deduplication for daily monitoring.
def save_leads(leads, path='intent_leads.json'):
try:
with open(path) as f: existing = json.load(f)
except FileNotFoundError: existing = []
urls = {l['url'] for l in existing}
new = [l for l in leads if l['url'] not in urls]
existing.extend(new)
with open(path, 'w') as f: json.dump(existing, f, indent=2)
print(f'{len(new)} new leads ({len(existing)} total)')
save_leads(leads)
# crontab: 0 8 * * * python intent_monitor.py >> monitor.log 2>&1Python Example
import os, requests
API_KEY = os.environ['SCAVIO_API_KEY']
H = {'x-api-key': API_KEY, 'Content-Type': 'application/json'}
QUERIES = ['looking for serp api', 'best tavily alternative']
HIGH = ['looking for', 'best alternative', 'recommend a']
def monitor():
leads = []
for q in QUERIES:
data = requests.post('https://api.scavio.dev/api/v1/search',
headers=H, json={'query': q, 'platform': 'reddit', 'country_code': 'us'}).json()
for r in data.get('organic_results', [])[:5]:
text = f"{r.get('title', '')} {r.get('snippet', '')}".lower()
score = sum(2 for kw in HIGH if kw in text)
if score > 0: leads.append({'title': r['title'], 'url': r['link'], 'score': score})
leads.sort(key=lambda x: x['score'], reverse=True)
for l in leads[:5]: print(f'[{l["score"]}] {l["title"][:60]}')
print(f'Cost: ${len(QUERIES) * 0.005:.3f}')
monitor()JavaScript Example
const API_KEY = process.env.SCAVIO_API_KEY;
const H = { 'x-api-key': API_KEY, 'Content-Type': 'application/json' };
const QUERIES = ['looking for serp api', 'best tavily alternative'];
const HIGH = ['looking for', 'best alternative', 'recommend a'];
async function monitor() {
const leads = [];
for (const q of QUERIES) {
const data = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: H,
body: JSON.stringify({ query: q, platform: 'reddit', country_code: 'us' })
}).then(r => r.json());
for (const r of (data.organic_results || []).slice(0, 5)) {
const text = `${r.title} ${r.snippet || ''}`.toLowerCase();
const score = HIGH.filter(kw => text.includes(kw)).length * 2;
if (score > 0) leads.push({ title: r.title, url: r.link, score });
}
}
leads.sort((a, b) => b.score - a.score)
.slice(0, 5).forEach(l => console.log(`[${l.score}] ${l.title.slice(0, 60)}`));
}
monitor().catch(console.error);Expected Output
Found 12 intent signals from 4 queries
Cost: $0.020
[Score: 6] Looking for a SERP API that doesn't cost a fortune - recommendations?
https://reddit.com/r/webdev/comments/...
[Score: 4] Best Tavily alternative after the Nebius acquisition?
https://reddit.com/r/LangChain/comments/...
3 new leads (15 total)