Apollo and similar B2B data providers lock you into expensive subscriptions with data that goes stale. A search-based enrichment layer pulls fresh data every time you need it: company descriptions from Google Knowledge Graph, tech stack signals from job postings, hiring velocity, and social presence. This tutorial builds a modular enrichment layer using the Scavio API at $0.005 per search. No annual contracts, no stale databases.
Prerequisites
- Python 3.9+ installed
- requests library installed
- A Scavio API key from scavio.dev
- A list of company domains or names to enrich
Walkthrough
Step 1: Build the company profile enricher
Search Google for each company to extract Knowledge Graph data, website description, and basic company information.
import os, requests, time, re
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
URL = 'https://api.scavio.dev/api/v1/search'
H = {'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'}
def enrich_company(company: str) -> dict:
"""Get basic company profile from search."""
resp = requests.post(URL, headers=H,
json={'query': f'{company} company', 'country_code': 'us', 'num_results': 5})
data = resp.json()
kg = data.get('knowledge_graph', {})
organic = data.get('organic_results', [])
return {
'company': company,
'description': kg.get('description', organic[0].get('snippet', '') if organic else ''),
'website': kg.get('website', ''),
'type': kg.get('type', ''),
'founded': kg.get('founded', ''),
'headquarters': kg.get('headquarters', ''),
'employees': kg.get('employees', ''),
}
profile = enrich_company('Vercel')
for key, val in profile.items():
if val:
print(f' {key}: {val}')Step 2: Add tech stack detection
Search for the company's job postings to detect their tech stack. Job requirements reveal what technologies the company actually uses.
def detect_tech_stack(company: str) -> dict:
"""Detect tech stack from job postings."""
resp = requests.post(URL, headers=H,
json={'query': f'{company} hiring engineer 2026',
'country_code': 'us', 'num_results': 5})
results = resp.json().get('organic_results', [])
all_text = ' '.join(f"{r.get('title','')} {r.get('snippet','')}" for r in results).lower()
tech_categories = {
'languages': ['python', 'javascript', 'typescript', 'go', 'rust', 'java', 'ruby'],
'frameworks': ['react', 'next.js', 'django', 'fastapi', 'rails', 'spring', 'vue'],
'databases': ['postgresql', 'mongodb', 'redis', 'mysql', 'dynamodb', 'elasticsearch'],
'cloud': ['aws', 'gcp', 'azure', 'vercel', 'cloudflare', 'railway'],
'tools': ['docker', 'kubernetes', 'terraform', 'github actions', 'datadog'],
}
detected = {}
for category, techs in tech_categories.items():
found = [t for t in techs if t in all_text]
if found:
detected[category] = found
return {
'tech_stack': detected,
'hiring_signals': len(results),
'job_titles': [r['title'][:60] for r in results[:3]],
}
tech = detect_tech_stack('Vercel')
print(f'Tech Stack:')
for cat, techs in tech['tech_stack'].items():
print(f' {cat}: {", ".join(techs)}')
print(f'\nHiring signals: {tech["hiring_signals"]} job postings')Step 3: Add social presence analysis
Check the company's presence on Reddit and YouTube. Active social presence indicates a company that engages with developers and could be a good prospect.
def analyze_social(company: str) -> dict:
"""Check social presence on Reddit and YouTube."""
social = {}
# Reddit presence
resp = requests.post(URL, headers=H,
json={'query': f'site:reddit.com {company}',
'country_code': 'us', 'num_results': 5})
reddit_results = resp.json().get('organic_results', [])
social['reddit'] = {
'mentions': len(reddit_results),
'subreddits': list(set(
re.search(r'r/(\w+)', r.get('link', '')).group(1)
for r in reddit_results
if re.search(r'r/(\w+)', r.get('link', ''))
)),
'recent_topics': [r['title'][:60] for r in reddit_results[:3]],
}
time.sleep(0.3)
# YouTube presence
resp = requests.post(URL, headers=H,
json={'query': f'site:youtube.com {company}',
'country_code': 'us', 'num_results': 5})
yt_results = resp.json().get('organic_results', [])
social['youtube'] = {
'videos': len(yt_results),
'recent_videos': [r['title'][:60] for r in yt_results[:3]],
}
return social
social = analyze_social('Vercel')
print(f'Reddit: {social["reddit"]["mentions"]} mentions in {", ".join(social["reddit"]["subreddits"][:3])}')
print(f'YouTube: {social["youtube"]["videos"]} videos')Step 4: Build the full enrichment pipeline and export
Combine all enrichment sources into a complete company profile. Export as JSON for CRM integration or CSV for manual review.
import json, csv
def full_enrichment(company: str) -> dict:
"""Run all enrichment steps for a company."""
profile = enrich_company(company)
time.sleep(0.3)
tech = detect_tech_stack(company)
time.sleep(0.3)
social = analyze_social(company)
profile.update({
'tech_stack': tech['tech_stack'],
'hiring_signals': tech['hiring_signals'],
'reddit_mentions': social['reddit']['mentions'],
'youtube_videos': social['youtube']['videos'],
'credits_used': 4, # 1 company + 1 jobs + 1 reddit + 1 youtube
'cost': 0.020,
})
return profile
def batch_enrich(companies: list, output: str = 'enriched_companies.json'):
results = []
for company in companies:
print(f'Enriching: {company}...')
profile = full_enrichment(company)
results.append(profile)
time.sleep(0.5)
with open(output, 'w') as f:
json.dump(results, f, indent=2)
total_cost = sum(r['cost'] for r in results)
print(f'\nEnriched {len(results)} companies')
print(f'Total cost: ${total_cost:.3f}')
print(f'Apollo equivalent: ~$50-100/month for similar data')
return results
results = batch_enrich(['Vercel', 'Supabase', 'Railway'])
for r in results:
stack = ', '.join(t for techs in r.get('tech_stack', {}).values() for t in techs)
print(f" {r['company']}: {r.get('description', '')[:50]}")
print(f" Stack: {stack[:60]}")Python Example
import os, requests, time
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
H = {'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'}
def enrich(company):
# Company info
resp = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'query': f'{company} company', 'country_code': 'us', 'num_results': 3})
kg = resp.json().get('knowledge_graph', {})
time.sleep(0.3)
# Tech stack from jobs
resp2 = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'query': f'{company} hiring engineer', 'country_code': 'us', 'num_results': 3})
text = ' '.join(r.get('snippet','') for r in resp2.json().get('organic_results', [])).lower()
techs = [t for t in ['python','react','typescript','aws','docker'] if t in text]
print(f"{company}: {kg.get('description','N/A')[:60]}")
print(f" Tech: {', '.join(techs) or 'N/A'}")
for c in ['Vercel', 'Supabase']:
enrich(c)
time.sleep(0.3)JavaScript Example
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
async function enrich(company) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query: `${company} company`, country_code: 'us', num_results: 3 })
});
const kg = (await resp.json()).knowledge_graph || {};
const resp2 = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query: `${company} hiring engineer`, country_code: 'us', num_results: 3 })
});
const text = ((await resp2.json()).organic_results || []).map(r => r.snippet || '').join(' ').toLowerCase();
const techs = ['python','react','typescript','aws','docker'].filter(t => text.includes(t));
console.log(`${company}: ${(kg.description || 'N/A').slice(0, 60)}`);
console.log(` Tech: ${techs.join(', ') || 'N/A'}`);
}
(async () => { for (const c of ['Vercel', 'Supabase']) await enrich(c); })();Expected Output
Enriching: Vercel...
Enriching: Supabase...
Enriching: Railway...
Enriched 3 companies
Total cost: $0.060
Apollo equivalent: ~$50-100/month for similar data
Vercel: Cloud platform for frontend frameworks and serverle
Stack: typescript, react, next.js, aws
Supabase: Open source Firebase alternative with PostgreSQL
Stack: typescript, postgresql, docker
Railway: Cloud platform for deploying applications
Stack: typescript, docker, kubernetes