Government websites contain valuable data for lead generation (licensed businesses), compliance research (regulatory filings), and market intelligence (public records). But government sites are notoriously hard to scrape: inconsistent layouts, anti-bot protections, and no APIs. This tutorial uses Scavio's Google endpoint with site-restricted queries to search government databases and extract structured results without scraping.
Prerequisites
- Python 3.8+ installed
- requests library installed
- A Scavio API key from scavio.dev
Walkthrough
Step 1: Search a specific government domain
Use site: operator to restrict searches to a government website.
import requests, os
H = {'x-api-key': os.environ['SCAVIO_API_KEY']}
def search_gov(query: str, gov_domain: str) -> list:
resp = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': 'google', 'query': f'site:{gov_domain} {query}'}, timeout=10)
return resp.json().get('organic', [])
# Search Florida business licenses:
results = search_gov('contractor license', 'myfloridalicense.com')
for r in results[:5]:
print(f"{r['title']}: {r['link']}")Step 2: Search across multiple government sources
Query several government domains for comprehensive results.
GOV_SOURCES = {
'florida_licenses': 'myfloridalicense.com',
'sec_filings': 'sec.gov',
'fda_approvals': 'fda.gov',
'patent_office': 'patents.google.com',
'census': 'census.gov',
}
def multi_gov_search(query: str) -> dict:
results = {}
for name, domain in GOV_SOURCES.items():
results[name] = search_gov(query, domain)
return resultsStep 3: Extract licensed business leads
Search state licensing databases for businesses in a specific trade.
def find_licensed_businesses(trade: str, state_domain: str) -> list:
results = search_gov(f'{trade} active license', state_domain)
leads = []
for r in results:
leads.append({
'name': r.get('title', ''),
'url': r.get('link', ''),
'snippet': r.get('snippet', ''),
})
return leads
# Find licensed contractors in Florida:
contractors = find_licensed_businesses('general contractor', 'myfloridalicense.com')
print(f'Found {len(contractors)} licensed contractors')Step 4: Combine with enrichment for outreach
Enrich government data leads with web search for contact information.
def enrich_gov_lead(lead: dict) -> dict:
# Search for the business name to find their website and contact info
resp = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': 'google', 'query': lead['name']}, timeout=10)
web_results = resp.json().get('organic', [])[:3]
lead['website'] = web_results[0].get('link', '') if web_results else ''
lead['web_snippet'] = web_results[0].get('snippet', '') if web_results else ''
return lead
enriched = [enrich_gov_lead(l) for l in contractors[:10]]Python Example
import requests, os
H = {'x-api-key': os.environ['SCAVIO_API_KEY']}
def search_gov(query, domain):
data = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': 'google', 'query': f'site:{domain} {query}'}, timeout=10).json()
return data.get('organic', [])JavaScript Example
async function searchGov(query, domain) {
const data = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: {'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json'},
body: JSON.stringify({platform: 'google', query: `site:${domain} ${query}`})
}).then(r => r.json());
return data.organic || [];
}Expected Output
Structured search results from government databases, usable for lead generation, compliance research, and market intelligence.