Replace Octoparse for Google Maps data extraction by switching from browser-based scraping to a search API that returns structured business data directly. Octoparse requires configuring visual scraping workflows, managing browser instances, and handling anti-bot measures that Google continuously updates. A search API returns the same local business data as structured JSON with no browser overhead, no anti-bot issues, and no workflow maintenance. The switch eliminates the Octoparse Standard plan cost ($69/mo) while providing more reliable data access.
Prerequisites
- Python 3.8+ installed
- requests library installed
- A Scavio API key from scavio.dev
- Business categories and locations to search
Walkthrough
Step 1: Search for local businesses
Query Google through the API to get local business listings that Octoparse would scrape.
import os, requests, json
API_KEY = os.environ['SCAVIO_API_KEY']
def search_local(query: str, location: str) -> list:
full_query = f'{query} in {location}'
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY},
json={'platform': 'google', 'query': full_query}, timeout=15)
data = resp.json()
results = data.get('organic_results', [])
businesses = []
for r in results:
businesses.append({
'name': r.get('title', ''),
'url': r.get('link', ''),
'snippet': r.get('snippet', ''),
'source': r.get('source', ''),
})
return businesses
businesses = search_local('plumbers', 'Austin TX')
print(f'Found {len(businesses)} results')
for b in businesses[:3]:
print(f" {b['name'][:50]}")Step 2: Extract business details
Parse business information from search result snippets including phone numbers, addresses, and ratings.
import re
def extract_business_details(result: dict) -> dict:
text = result.get('snippet', '') + ' ' + result.get('name', '')
# Extract phone number
phone_match = re.search(r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', text)
phone = phone_match.group() if phone_match else ''
# Extract rating
rating_match = re.search(r'(\d+\.?\d*)\s*(?:stars?|rating|/5)', text.lower())
rating = float(rating_match.group(1)) if rating_match else 0
# Extract review count
review_match = re.search(r'(\d[\d,]*)\s*reviews?', text.lower())
reviews = review_match.group(1).replace(',', '') if review_match else '0'
return {
'name': result.get('name', result.get('title', '')),
'phone': phone,
'rating': rating,
'reviews': int(reviews) if reviews.isdigit() else 0,
'url': result.get('url', result.get('link', '')),
'snippet': result.get('snippet', '')[:200],
}
for b in businesses[:3]:
details = extract_business_details(b)
print(f" {details['name'][:40]}: rating={details['rating']}, phone={details['phone']}")Step 3: Search multiple categories
Batch-search across multiple business categories and locations.
import time
def multi_category_search(categories: list, locations: list) -> list:
all_results = []
for location in locations:
for category in categories:
businesses = search_local(category, location)
for b in businesses:
details = extract_business_details(b)
details['category'] = category
details['location'] = location
all_results.append(details)
print(f'{category} in {location}: {len(businesses)} results')
time.sleep(0.3)
return all_results
categories = ['plumbers', 'electricians', 'HVAC repair']
locations = ['Austin TX', 'Houston TX']
results = multi_category_search(categories, locations)
print(f'\nTotal businesses found: {len(results)}')Step 4: Deduplicate businesses
Remove duplicate listings that appear across multiple searches.
def deduplicate_businesses(businesses: list) -> list:
seen = set()
deduped = []
for b in businesses:
key = b['name'].lower().strip()[:40]
if key in seen:
continue
seen.add(key)
deduped.append(b)
removed = len(businesses) - len(deduped)
print(f'Deduplication: {len(businesses)} -> {len(deduped)} ({removed} removed)')
return deduped
deduped = deduplicate_businesses(results)Step 5: Export to CSV
Save the extracted business data as a CSV, matching the format Octoparse would have exported.
import csv
def export_businesses(businesses: list, output_path: str):
fields = ['name', 'category', 'location', 'phone', 'rating', 'reviews', 'url', 'snippet']
with open(output_path, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fields, extrasaction='ignore')
writer.writeheader()
writer.writerows(businesses)
print(f'Exported {len(businesses)} businesses to {output_path}')
export_businesses(deduped, 'local_businesses.csv')
# Compare with Octoparse:
# Octoparse: $69/mo Standard, browser-based, anti-bot issues
# Scavio API: $30/mo for 7K credits, no browser, structured JSON
print('\nOctoparse replacement complete')Python Example
import requests, os
H = {'x-api-key': os.environ['SCAVIO_API_KEY']}
def local_search(category, location):
data = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': 'google', 'query': f'{category} in {location}'}).json()
return [{'name': r.get('title', ''), 'url': r.get('link', '')}
for r in data.get('organic_results', [])[:5]]
print(local_search('plumbers', 'Austin TX'))JavaScript Example
const H = {'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json'};
async function localSearch(category, location) {
const r = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: H,
body: JSON.stringify({platform: 'google', query: `${category} in ${location}`})
});
return ((await r.json()).organic_results || []).slice(0, 5)
.map(r => ({name: r.title, url: r.link}));
}
localSearch('plumbers', 'Austin TX').then(console.log);Expected Output
A local business data extraction pipeline that replaces Octoparse browser scraping with API-based search, producing the same CSV output without browser automation overhead.