Apollo and ZoomInfo charge $100+/month for local business data. For basic SMB discovery -- finding businesses by category and location -- a search API gives you names, websites, and contact info from Google results at $0.005/query. This tutorial builds a local SMB pipeline that discovers, categorizes, and exports business leads.
Prerequisites
- Python 3.8+
- requests library
- A Scavio API key from scavio.dev
- Target locations and business categories
Walkthrough
Step 1: Search for local businesses by category and location
Query Google for businesses in specific categories and locations.
import os, requests, json, csv
from datetime import datetime
from collections import defaultdict
API_KEY = os.environ['SCAVIO_API_KEY']
SH = {'x-api-key': API_KEY, 'Content-Type': 'application/json'}
CATEGORIES = ['plumber', 'electrician', 'HVAC contractor']
LOCATIONS = ['Austin TX', 'Denver CO', 'Nashville TN']
def find_businesses(category, location):
query = f'{category} in {location}'
data = requests.post('https://api.scavio.dev/api/v1/search',
headers=SH, json={'query': query, 'country_code': 'us'}, timeout=10).json()
businesses = []
for r in data.get('organic_results', []):
link = r.get('link', '')
if any(d in link for d in ['yelp.com', 'yellowpages', 'facebook.com', 'google.com/maps']):
continue # Skip directories
businesses.append({
'name': r.get('title', '').split(' - ')[0].split(' | ')[0].strip(),
'website': link,
'domain': r.get('displayed_link', '').split('/')[0],
'description': r.get('snippet', '')[:120],
'category': category,
'location': location,
})
return businesses
all_leads = []
for cat in CATEGORIES:
for loc in LOCATIONS:
leads = find_businesses(cat, loc)
all_leads.extend(leads)
print(f' {cat:20} in {loc:15} | {len(leads)} businesses')
print(f'\nTotal leads: {len(all_leads)}')
print(f'Cost: ${len(CATEGORIES) * len(LOCATIONS) * 0.005:.3f}')Step 2: Deduplicate and enrich leads
Remove duplicates and add business category tags.
def deduplicate_leads(leads):
seen_domains = set()
unique = []
for lead in leads:
domain = lead['domain'].lower()
if domain and domain not in seen_domains:
seen_domains.add(domain)
unique.append(lead)
print(f'Deduplication: {len(leads)} -> {len(unique)} unique leads')
return unique
def enrich_lead(lead):
"""Add basic enrichment from search snippets."""
desc = lead['description'].lower()
tags = []
if any(w in desc for w in ['24/7', 'emergency', '24 hour']): tags.append('emergency_service')
if any(w in desc for w in ['licensed', 'certified', 'insured']): tags.append('licensed')
if any(w in desc for w in ['free estimate', 'free quote']): tags.append('offers_estimates')
if any(w in desc for w in ['residential', 'home']): tags.append('residential')
if any(w in desc for w in ['commercial', 'business']): tags.append('commercial')
lead['tags'] = tags
return lead
unique_leads = deduplicate_leads(all_leads)
enriched = [enrich_lead(lead) for lead in unique_leads]
print(f'\nEnriched leads by category:')
by_cat = defaultdict(list)
for lead in enriched:
by_cat[lead['category']].append(lead)
for cat, leads in by_cat.items():
tagged = sum(1 for l in leads if l['tags'])
print(f' {cat:20} | {len(leads)} leads | {tagged} enriched')Step 3: Export leads to CSV
Export the cleaned lead list to CSV for CRM import.
def export_leads_csv(leads, filename=None):
if not filename:
filename = f'smb_leads_{datetime.now().strftime("%Y%m%d")}.csv'
with open(filename, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=['name', 'website', 'domain', 'category', 'location', 'tags', 'description'])
writer.writeheader()
for lead in leads:
row = {**lead, 'tags': ', '.join(lead.get('tags', []))}
writer.writerow({k: row[k] for k in writer.fieldnames})
print(f'\nExported {len(leads)} leads to {filename}')
# Summary
print(f'\n=== SMB Discovery Summary ===')
print(f' Categories: {len(set(l["category"] for l in leads))}')
print(f' Locations: {len(set(l["location"] for l in leads))}')
print(f' Total unique leads: {len(leads)}')
print(f' With tags: {sum(1 for l in leads if l.get("tags"))}')
print(f'\n Apollo: $49-119/mo for local business data')
print(f' ZoomInfo: $250+/mo for SMB data')
print(f' This pipeline: ${len(CATEGORIES) * len(LOCATIONS) * 0.005:.3f}/run')
print(f' Monthly (daily): ${len(CATEGORIES) * len(LOCATIONS) * 0.005 * 30:.2f}')
export_leads_csv(enriched)Python Example
import os, requests
SH = {'x-api-key': os.environ['SCAVIO_API_KEY'], 'Content-Type': 'application/json'}
def find_smbs(category, location):
data = requests.post('https://api.scavio.dev/api/v1/search',
headers=SH, json={'query': f'{category} in {location}', 'country_code': 'us'}, timeout=10).json()
for r in data.get('organic_results', [])[:5]:
print(f' {r.get("title", "")[:40]} | {r.get("displayed_link", "")}')
find_smbs('plumber', 'Austin TX')
print('Cost: $0.005')JavaScript Example
const SH = { 'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json' };
const data = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: SH,
body: JSON.stringify({ query: 'plumber in Austin TX', country_code: 'us' })
}).then(r => r.json());
(data.organic_results || []).slice(0, 5).forEach(r => {
console.log(`${r.title?.slice(0, 40)} | ${r.displayed_link}`);
});Expected Output
plumber in Austin TX | 6 businesses
plumber in Denver CO | 5 businesses
electrician in Austin TX | 7 businesses
HVAC contractor in Nashville TN | 4 businesses
Total leads: 48
Cost: $0.045
Deduplication: 48 -> 42 unique leads
Exported 42 leads to smb_leads_20260521.csv
=== SMB Discovery Summary ===
Categories: 3
Locations: 3
Total unique leads: 42
Apollo: $49-119/mo for local business data
This pipeline: $0.045/run
Monthly (daily): $1.35