Local lead generation at scale requires searching for businesses by type and location, extracting contact information, and qualifying leads. This tutorial builds a pipeline that searches for local businesses using SERP data, extracts websites, emails, and phone numbers from the results, and scores each lead. One search costs $0.005, and a typical city-category scan of 20 queries costs $0.10.
Prerequisites
- Python 3.9+ installed
- requests library installed
- A Scavio API key from scavio.dev
Walkthrough
Step 1: Define target locations and business categories
Set up the cities and business types to search. Each combination becomes one search query.
locations = [
'Austin TX', 'Denver CO', 'Portland OR',
'Nashville TN', 'Raleigh NC'
]
categories = [
'plumber', 'electrician', 'HVAC contractor',
'roofing company'
]
total_queries = len(locations) * len(categories)
cost = total_queries * 0.005
print(f'{len(locations)} cities x {len(categories)} categories = {total_queries} queries')
print(f'Estimated cost: ${cost:.2f}')Step 2: Search for local businesses
Run search queries for each location-category pair and collect the business listings from SERP results.
import requests, os, time, re
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
def search_local(category: str, location: str) -> list:
query = f'{category} in {location}'
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': 10})
resp.raise_for_status()
results = resp.json().get('organic_results', [])
leads = []
for r in results:
domain = r['link'].split('/')[2] if '/' in r['link'] else ''
# Skip aggregator sites
if any(agg in domain for agg in ['yelp.com', 'yellowpages.com',
'homeadvisor.com', 'angi.com', 'thumbtack.com', 'bbb.org']):
continue
leads.append({
'business_name': r['title'].split(' - ')[0].split(' | ')[0].strip(),
'website': r['link'],
'domain': domain,
'snippet': r.get('snippet', ''),
'category': category,
'location': location
})
return leads
# Test one query
leads = search_local('plumber', 'Austin TX')
print(f'Found {len(leads)} leads for plumber in Austin TX')
for l in leads[:3]:
print(f' {l["business_name"]}: {l["domain"]}')Step 3: Extract contact information from snippets
Parse phone numbers, emails, and addresses from SERP snippets. Many local business listings include contact info directly.
def extract_contacts(lead: dict) -> dict:
text = lead.get('snippet', '') + ' ' + lead.get('business_name', '')
# Phone numbers
phones = re.findall(r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', text)
# Email patterns
emails = re.findall(r'[\w.+-]+@[\w-]+\.[\w.-]+', text)
# Rating patterns (e.g., "4.8 stars", "4.8/5")
ratings = re.findall(r'(\d\.\d)\s*(?:stars?|/5|rating)', text.lower())
lead['phone'] = phones[0] if phones else ''
lead['email'] = emails[0] if emails else ''
lead['rating'] = float(ratings[0]) if ratings else 0.0
return lead
# Enrich existing leads
enriched = [extract_contacts(l) for l in leads]
for l in enriched[:3]:
print(f' {l["business_name"]}: phone={l["phone"] or "N/A"}, '
f'rating={l["rating"] or "N/A"}')Step 4: Run the full pipeline and export results
Scan all location-category combinations, enrich leads, deduplicate by domain, and export to CSV.
import csv
def run_pipeline(locations: list, categories: list) -> list:
all_leads = []
seen_domains = set()
for location in locations:
for category in categories:
leads = search_local(category, location)
for lead in leads:
if lead['domain'] not in seen_domains:
seen_domains.add(lead['domain'])
all_leads.append(extract_contacts(lead))
time.sleep(0.3)
return all_leads
leads = run_pipeline(locations[:2], categories[:2]) # small test
print(f'Total unique leads: {len(leads)}')
# Export to CSV
if leads:
with open('local_leads.csv', 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=['business_name', 'domain',
'phone', 'email', 'rating', 'category', 'location', 'website'])
writer.writeheader()
writer.writerows(leads)
print(f'Exported to local_leads.csv')
queries_used = len(locations[:2]) * len(categories[:2])
print(f'Cost: {queries_used} queries = ${queries_used * 0.005:.3f}')Python Example
import requests, os, re, time, csv
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
def find_leads(category, location):
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': f'{category} in {location}', 'country_code': 'us', 'num_results': 10})
leads = []
for r in resp.json().get('organic_results', []):
domain = r['link'].split('/')[2] if '/' in r['link'] else ''
if not any(a in domain for a in ['yelp.com', 'yellowpages.com', 'angi.com']):
phones = re.findall(r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', r.get('snippet', ''))
leads.append({'name': r['title'].split(' - ')[0], 'domain': domain,
'phone': phones[0] if phones else ''})
return leads
leads = find_leads('plumber', 'Austin TX')
for l in leads[:5]:
print(f'{l["name"]}: {l["domain"]} {l["phone"]}')JavaScript Example
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
async function findLeads(category, location) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query: `${category} in ${location}`, country_code: 'us', num_results: 10 })
});
const data = await resp.json();
return (data.organic_results || []).filter(r => {
const domain = new URL(r.link).hostname;
return !['yelp.com', 'yellowpages.com', 'angi.com'].some(a => domain.includes(a));
}).map(r => ({
name: r.title.split(' - ')[0],
link: r.link,
phone: (r.snippet || '').match(/\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/)?.[0] || ''
}));
}
findLeads('plumber', 'Austin TX').then(leads => leads.forEach(l => console.log(`${l.name}: ${l.phone}`)));Expected Output
5 cities x 4 categories = 20 queries
Estimated cost: $0.10
Found 7 leads for plumber in Austin TX
Radiant Plumbing: radiantplumbing.com
ABC Home Services: abchomeservices.com
Mr Rooter Plumbing: mrrooter.com
Total unique leads: 24
Exported to local_leads.csv
Cost: 4 queries = $0.020