A single enrichment source misses 20-40% of leads due to incomplete databases. A waterfall pipeline tries the primary source first, falls back to SERP search for missing data, and validates emails at the end. This tutorial builds a multi-source pipeline where SERP enrichment via Scavio fills gaps left by your primary provider at $0.005 per lookup.
Prerequisites
- Python 3.8+
- requests library
- A Scavio API key from scavio.dev
- A CSV or list of leads to enrich
Walkthrough
Step 1: Define the lead enrichment waterfall
Set up the pipeline stages: primary source, SERP fallback, validation.
import os, requests, json, csv
API_KEY = os.environ['SCAVIO_API_KEY']
SH = {'x-api-key': API_KEY, 'Content-Type': 'application/json'}
def primary_enrich(lead):
"""Simulate primary enrichment source (Apollo, Clearbit, etc.)"""
# Replace with your actual primary provider
return {'company': lead.get('company'), 'domain': lead.get('domain'),
'industry': None, 'description': None, 'employee_count': None}
def serp_enrich(company_name):
"""Fallback: search Google for company info."""
data = requests.post('https://api.scavio.dev/api/v1/search',
headers=SH, json={'query': f'{company_name} company', 'country_code': 'us'}).json()
organic = data.get('organic_results', [])[:3]
info = {'description': '', 'industry_hints': []}
for r in organic:
snippet = r.get('snippet', '')
if len(snippet) > len(info['description']):
info['description'] = snippet[:200]
return info
print('Pipeline configured: Primary -> SERP fallback -> Validation')Step 2: Build the waterfall logic
Try primary first, fill gaps with SERP search, track coverage.
def enrich_lead(lead):
result = primary_enrich(lead)
cost = 0.0
sources = ['primary']
# Fill gaps with SERP search
if not result.get('description') and lead.get('company'):
serp = serp_enrich(lead['company'])
result['description'] = serp.get('description', '')
cost += 0.005
sources.append('serp')
# Check Reddit for additional signals
if lead.get('company'):
reddit = requests.post('https://api.scavio.dev/api/v1/search',
headers=SH, json={'query': f'{lead["company"]} review',
'platform': 'reddit', 'country_code': 'us'}).json()
mentions = len(reddit.get('organic_results', []))
result['reddit_mentions'] = mentions
cost += 0.005
sources.append('reddit')
result['sources'] = sources
result['cost'] = cost
return result
leads = [
{'company': 'Acme Corp', 'domain': 'acme.com', 'email': 'info@acme.com'},
{'company': 'Beta Labs', 'domain': 'betalabs.io', 'email': 'hello@betalabs.io'},
]
for lead in leads:
enriched = enrich_lead(lead)
print(f'{lead["company"]}: {len(enriched["sources"])} sources, ${enriched["cost"]:.3f}')
print(f' Description: {enriched["description"][:60]}...')
print(f' Reddit mentions: {enriched.get("reddit_mentions", 0)}')Step 3: Validate and score leads
Score leads based on enrichment completeness and signal strength.
def score_lead(enriched):
score = 0
if enriched.get('description'): score += 25
if enriched.get('domain'): score += 20
if enriched.get('reddit_mentions', 0) > 0: score += 15
if enriched.get('industry'): score += 20
if enriched.get('employee_count'): score += 20
return score
def enrich_batch(leads):
results = []
total_cost = 0
for lead in leads:
enriched = enrich_lead(lead)
enriched['score'] = score_lead(enriched)
results.append(enriched)
total_cost += enriched['cost']
results.sort(key=lambda x: x['score'], reverse=True)
print(f'\nEnriched {len(results)} leads. Total cost: ${total_cost:.3f}')
for r in results:
print(f' {r["company"]:20} | score: {r["score"]:3} | sources: {", ".join(r["sources"])}')
return results
enrich_batch(leads)Step 4: Export enriched leads
Save enriched leads with scores to JSON for your CRM.
def export_leads(enriched_leads, filename='enriched_leads.json'):
export = []
for lead in enriched_leads:
export.append({
'company': lead.get('company'),
'domain': lead.get('domain'),
'description': lead.get('description', '')[:200],
'reddit_mentions': lead.get('reddit_mentions', 0),
'enrichment_score': lead.get('score', 0),
'sources': lead.get('sources', []),
'cost': lead.get('cost', 0)
})
with open(filename, 'w') as f:
json.dump(export, f, indent=2)
total_cost = sum(l['cost'] for l in export)
avg_score = sum(l['enrichment_score'] for l in export) / len(export) if export else 0
print(f'\nExported {len(export)} leads to {filename}')
print(f'Average enrichment score: {avg_score:.0f}/100')
print(f'Total enrichment cost: ${total_cost:.3f}')
print(f'Cost per lead: ${total_cost/len(export):.4f}')
export_leads(enrich_batch(leads))Python Example
import os, requests
SH = {'x-api-key': os.environ['SCAVIO_API_KEY'], 'Content-Type': 'application/json'}
def enrich(company):
g = requests.post('https://api.scavio.dev/api/v1/search',
headers=SH, json={'query': f'{company} company', 'country_code': 'us'}).json()
r = requests.post('https://api.scavio.dev/api/v1/search',
headers=SH, json={'query': f'{company} review', 'platform': 'reddit', 'country_code': 'us'}).json()
desc = (g.get('organic_results') or [{}])[0].get('snippet', 'N/A')[:80]
mentions = len(r.get('organic_results', []))
print(f'{company}: "{desc}" | Reddit: {mentions} mentions | Cost: $0.010')
enrich('Stripe')JavaScript Example
const SH = { 'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json' };
async function enrich(company) {
const [g, r] = await Promise.all([
fetch('https://api.scavio.dev/api/v1/search', { method: 'POST', headers: SH,
body: JSON.stringify({ query: `${company} company`, country_code: 'us' }) }).then(r => r.json()),
fetch('https://api.scavio.dev/api/v1/search', { method: 'POST', headers: SH,
body: JSON.stringify({ query: `${company} review`, platform: 'reddit', country_code: 'us' }) }).then(r => r.json()),
]);
const desc = (g.organic_results || [{}])[0]?.snippet?.slice(0, 80) || 'N/A';
console.log(`${company}: "${desc}" | Reddit: ${(r.organic_results||[]).length} mentions`);
}
enrich('Stripe').catch(console.error);Expected Output
Pipeline configured: Primary -> SERP fallback -> Validation
Acme Corp: 3 sources, $0.010
Description: Acme Corp provides enterprise software solutions for supply chai...
Reddit mentions: 3
Beta Labs: 3 sources, $0.010
Description: Beta Labs builds developer tools for API testing and monitoring...
Reddit mentions: 1
Enriched 2 leads. Total cost: $0.020
Acme Corp | score: 60 | sources: primary, serp, reddit
Beta Labs | score: 60 | sources: primary, serp, reddit
Exported 2 leads to enriched_leads.json
Cost per lead: $0.0100