Lead enrichment adds context to raw contact lists by filling in company size, industry, tech stack, recent news, and funding data. Traditional enrichment services like Apollo charge $49+/user/month for pre-built databases that go stale. Using a search API, you can enrich leads with the freshest publicly available data for $0.005 per lookup. This tutorial builds a Python enrichment pipeline that takes a CSV of leads, searches for each company, and outputs an enriched spreadsheet ready for outbound sales.
Prerequisites
- Python 3.9+ installed
- requests library installed
- A Scavio API key from scavio.dev
- A CSV file with company names or domains to enrich
Walkthrough
Step 1: Load your lead list from CSV
Read a CSV with at minimum a company name or domain column. The script will search for each company and add enrichment columns.
import csv
def load_leads(csv_path: str) -> list:
with open(csv_path) as f:
reader = csv.DictReader(f)
leads = list(reader)
print(f'Loaded {len(leads)} leads from {csv_path}')
return leads
# Expected CSV format:
# company,domain,contact_name,email
# Acme Corp,acme.com,John Doe,john@acme.com
# BetaTech,betatech.io,Jane Smith,jane@betatech.ioStep 2: Build the enrichment search function
For each lead, run two targeted searches: one for company overview and one for recent news. Parse the SERP snippets to extract key data points.
import requests, os
API_KEY = os.environ['SCAVIO_API_KEY']
def search(query: str) -> list:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us'})
return resp.json().get('organic_results', [])
def enrich_company(company: str, domain: str = '') -> dict:
# Company overview search
overview_query = f'{company} company overview' + (f' site:{domain}' if domain else '')
overview = search(overview_query)
overview_snippet = overview[0].get('snippet', '') if overview else ''
# Recent news search
news = search(f'{company} funding OR hiring OR launch 2026')
recent_news = [r.get('title', '') for r in news[:3]]
return {
'description': overview_snippet[:200],
'website': overview[0].get('link', '') if overview else '',
'recent_news': ' | '.join(recent_news),
'news_count': len(news)
}Step 3: Extract structured data from snippets
Parse search snippets to identify company size indicators, industry, and tech stack mentions. Simple keyword matching is effective for this.
import re
def extract_signals(snippets: list) -> dict:
text = ' '.join(snippets).lower()
# Company size signals
size_match = re.search(r'(\d+[,.]?\d*)\+?\s*employees', text)
size = size_match.group(0) if size_match else 'unknown'
# Industry detection
industries = ['saas', 'fintech', 'healthcare', 'ecommerce', 'edtech',
'cybersecurity', 'ai', 'logistics', 'real estate', 'marketing']
detected_industry = next((ind for ind in industries if ind in text), 'unknown')
# Funding signals
funding_match = re.search(r'raised\s+\$([\d.]+[mb])', text)
funding = funding_match.group(0) if funding_match else 'not found'
# Tech stack mentions
techs = ['react', 'python', 'aws', 'azure', 'kubernetes', 'shopify',
'salesforce', 'hubspot', 'stripe', 'twilio']
tech_stack = [t for t in techs if t in text]
return {'size': size, 'industry': detected_industry,
'funding': funding, 'tech_stack': tech_stack}Step 4: Process all leads with rate limiting
Enrich each lead with a delay between requests. Two searches per lead means 200 leads cost $2.00 total.
import time
def enrich_all(leads: list) -> list:
enriched = []
for i, lead in enumerate(leads):
company = lead.get('company', '')
domain = lead.get('domain', '')
if not company:
continue
data = enrich_company(company, domain)
# Extract signals from overview snippet
snippets_query = search(f'{company} company')
snippets = [r.get('snippet', '') for r in snippets_query[:5]]
signals = extract_signals(snippets)
enriched_lead = {**lead, **data, **signals}
enriched.append(enriched_lead)
if (i + 1) % 10 == 0:
print(f' Enriched {i + 1}/{len(leads)}')
time.sleep(0.3)
return enriched
leads = load_leads('leads.csv')
enriched = enrich_all(leads)
print(f'Enriched {len(enriched)} leads')Step 5: Save enriched leads to CSV
Write the enriched data back to a CSV. The output includes all original columns plus the new enrichment columns.
def save_enriched(leads: list, output_path: str) -> None:
if not leads:
print('No leads to save')
return
fieldnames = list(leads[0].keys())
with open(output_path, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for lead in leads:
# Convert lists to strings for CSV
row = {k: ', '.join(v) if isinstance(v, list) else v
for k, v in lead.items()}
writer.writerow(row)
credits_used = len(leads) * 3 # 3 searches per lead
cost = credits_used * 0.005
print(f'Saved {len(leads)} enriched leads to {output_path}')
print(f'Credits used: {credits_used} (${cost:.2f})')
save_enriched(enriched, 'enriched_leads.csv')Python Example
import os, csv, re, time, requests
API_KEY = os.environ['SCAVIO_API_KEY']
def search(query):
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us'})
return resp.json().get('organic_results', [])
def enrich(company, domain=''):
overview = search(f'{company} company overview')
news = search(f'{company} funding OR launch 2026')
return {
'description': overview[0].get('snippet', '')[:150] if overview else '',
'recent_news': news[0].get('title', '') if news else '',
}
def main():
companies = ['Stripe', 'Notion', 'Linear']
for c in companies:
data = enrich(c)
print(f'{c}: {data["description"][:60]}...')
print(f' News: {data["recent_news"][:60]}')
time.sleep(0.3)
print(f'Cost: ${len(companies) * 2 * 0.005:.3f}')
if __name__ == '__main__':
main()JavaScript Example
const API_KEY = process.env.SCAVIO_API_KEY;
async function search(query) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': API_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us' })
});
return (await resp.json()).organic_results || [];
}
async function enrich(company) {
const [overview, news] = await Promise.all([
search(`${company} company overview`),
search(`${company} funding OR launch 2026`)
]);
return {
description: overview[0]?.snippet?.slice(0, 150) || '',
news: news[0]?.title || ''
};
}
async function main() {
for (const company of ['Stripe', 'Notion', 'Linear']) {
const data = await enrich(company);
console.log(`${company}: ${data.description.slice(0, 60)}...`);
}
}
main().catch(console.error);Expected Output
Loaded 50 leads from leads.csv
Enriched 10/50
Enriched 20/50
Enriched 30/50
Enriched 40/50
Enriched 50/50
Enriched 50 leads
Saved 50 enriched leads to enriched_leads.csv
Credits used: 150 ($0.75)
Stripe: Stripe is a financial infrastructure platform for businesses...
News: Stripe Raises Series I at $70B Valuation (2026)
Notion: Notion is a connected workspace for docs, wikis, and projects...
News: Notion Launches AI-Powered Project Management