Scraper Google Maps directement est fragile et risque des bannissements IP. Les données Google Maps apparaissent dans les résultats de recherche sous forme de listes de pack local avec le nom de l'entreprise, la note, l'adresse, le téléphone et les horaires. En effectuant des recherches via l'API Scavio à 0,005 $ par requête, vous obtenez des données structurées sur les entreprises locales sans gérer de proxys, de navigateurs sans tête ou de solveurs de CAPTCHA. Ce tutoriel construit un extracteur de données d'entreprises locales qui récupère des données de qualité Maps à partir des résultats de recherche.
Prérequis
- Python 3.9+ installé
- bibliothèque requests installée
- Une clé API Scavio depuis scavio.dev
- Une liste de catégories d'entreprises ou de lieux à rechercher
Parcours
Étape 1: Rechercher des entreprises locales via l'API de recherche
Interroger les entreprises locales et extraire les résultats du pack local. Ceux-ci contiennent les mêmes données que vous gratteriez sur Google Maps : nom, note, adresse et plus encore.
import os, requests, json
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
URL = 'https://api.scavio.dev/api/v1/search'
H = {'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'}
def search_local_businesses(query: str, location: str = 'us') -> dict:
"""Search for local businesses and extract structured data."""
resp = requests.post(URL, headers=H,
json={'query': query, 'country_code': location, 'num_results': 10})
resp.raise_for_status()
data = resp.json()
return {
'local_results': data.get('local_results', []),
'organic_results': data.get('organic_results', []),
'knowledge_graph': data.get('knowledge_graph', {}),
}
result = search_local_businesses('best coffee shops in Austin TX')
local = result['local_results']
print(f'Found {len(local)} local results')
for biz in local[:5]:
print(f" {biz.get('title', 'N/A')}")
print(f" Rating: {biz.get('rating', 'N/A')} ({biz.get('reviews', 'N/A')} reviews)")
print(f" Address: {biz.get('address', 'N/A')}")Étape 2: Extraire des données structurées d'entreprises
Analyser les résultats de recherche pour extraire des fiches d'entreprises propres. Combinez les données du pack local avec les extraits de résultats organiques pour des profils plus riches.
def extract_business_data(query: str, location: str = 'us') -> list:
"""Extract structured business records from search results."""
data = search_local_businesses(query, location)
businesses = []
# Extract from local results (Maps data)
for biz in data.get('local_results', []):
businesses.append({
'name': biz.get('title', ''),
'rating': biz.get('rating', None),
'reviews_count': biz.get('reviews', None),
'address': biz.get('address', ''),
'phone': biz.get('phone', ''),
'hours': biz.get('hours', ''),
'type': biz.get('type', ''),
'source': 'local_pack',
})
# Extract from organic results
for result in data.get('organic_results', []):
snippet = result.get('snippet', '')
rich = result.get('rich_snippet', {})
if rich:
businesses.append({
'name': result.get('title', ''),
'rating': rich.get('rating', None),
'reviews_count': rich.get('reviews', None),
'address': '',
'phone': '',
'url': result.get('link', ''),
'source': 'organic_rich',
})
return businesses
businesses = extract_business_data('plumbers in Denver CO')
print(f'Extracted {len(businesses)} businesses')
for b in businesses[:5]:
print(f" {b['name']} - Rating: {b['rating']} ({b['source']})"
f"{' ' + b['address'] if b['address'] else ''}")Étape 3: Extraction par lots sur plusieurs catégories
Recherchez plusieurs catégories d'entreprises dans un lieu pour constituer une base de données complète d'entreprises locales. Limitez le débit pour respecter les directives de l'API.
import time
def batch_extract(categories: list, location: str, city: str) -> list:
"""Extract businesses across multiple categories."""
all_businesses = []
for category in categories:
query = f'{category} in {city}'
print(f'Searching: {query}')
businesses = extract_business_data(query, location)
for b in businesses:
b['category'] = category
b['city'] = city
all_businesses.extend(businesses)
time.sleep(0.5) # Rate limiting
# Deduplicate by name
seen = set()
unique = []
for b in all_businesses:
key = b['name'].lower().strip()
if key and key not in seen:
seen.add(key)
unique.append(b)
return unique
categories = ['restaurants', 'dentists', 'auto repair', 'hair salons']
businesses = batch_extract(categories, 'us', 'Portland OR')
print(f'\nTotal unique businesses: {len(businesses)}')
print(f'Cost: {len(categories)} searches = ${len(categories) * 0.005:.3f}')
for cat in categories:
count = len([b for b in businesses if b.get('category') == cat])
print(f' {cat}: {count}')Étape 4: Exporter en CSV pour analyse
Enregistrez les données d'entreprises extraites en CSV pour une utilisation dans des feuilles de calcul, des importations CRM ou des analyses supplémentaires.
import csv
def export_businesses(businesses: list, filename: str = 'local_businesses.csv'):
if not businesses:
print('No businesses to export')
return
fieldnames = ['name', 'category', 'city', 'rating', 'reviews_count',
'address', 'phone', 'hours', 'type', 'source']
with open(filename, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
writer.writeheader()
writer.writerows(businesses)
# Summary stats
rated = [b for b in businesses if b.get('rating')]
avg_rating = sum(float(b['rating']) for b in rated) / len(rated) if rated else 0
print(f'Exported {len(businesses)} businesses to {filename}')
print(f' With ratings: {len(rated)}')
print(f' Average rating: {avg_rating:.1f}')
print(f' Categories: {len(set(b.get("category","") for b in businesses))}')
export_businesses(businesses, 'portland_businesses.csv')Exemple Python
import os, requests, csv, time
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
H = {'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'}
def get_local_businesses(query):
resp = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'query': query, 'country_code': 'us', 'num_results': 10})
return resp.json().get('local_results', [])
def extract_and_export(categories, city, output='businesses.csv'):
all_biz = []
for cat in categories:
results = get_local_businesses(f'{cat} in {city}')
for r in results:
all_biz.append({'name': r.get('title',''), 'category': cat,
'rating': r.get('rating',''), 'address': r.get('address','')})
time.sleep(0.3)
with open(output, 'w', newline='') as f:
w = csv.DictWriter(f, fieldnames=['name','category','rating','address'])
w.writeheader()
w.writerows(all_biz)
print(f'Exported {len(all_biz)} businesses')
extract_and_export(['restaurants', 'dentists'], 'Austin TX')Exemple JavaScript
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
async function getLocalBusinesses(query) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us', num_results: 10 })
});
return (await resp.json()).local_results || [];
}
async function extractBusinesses(categories, city) {
const all = [];
for (const cat of categories) {
const results = await getLocalBusinesses(`${cat} in ${city}`);
results.forEach(r => all.push({ name: r.title, category: cat,
rating: r.rating, address: r.address }));
}
console.log(`Found ${all.length} businesses`);
all.forEach(b => console.log(` ${b.name} (${b.rating}) - ${b.category}`));
}
extractBusinesses(['restaurants', 'dentists'], 'Austin TX');Sortie attendue
Found 8 local results
Houndstooth Coffee
Rating: 4.6 (342 reviews)
Address: 401 Congress Ave, Austin, TX
Merit Coffee
Rating: 4.7 (289 reviews)
Address: 222 W 2nd St, Austin, TX
Total unique businesses: 24
Cost: 4 searches = $0.020
restaurants: 8
dentists: 6
auto repair: 5
hair salons: 5
Exported 24 businesses to portland_businesses.csv