Using search API data in production requires understanding what you can legally do with the results. Scraping Google directly violates their ToS, but using a compliant SERP API that accesses public search results through authorized methods is a different matter. This tutorial walks through a compliance audit checklist for the major platforms: Google, Amazon, Reddit, YouTube, and TikTok.
Prerequisites
- Your current search API documentation
- A Scavio API key from scavio.dev for testing
- Basic understanding of API Terms of Service
Walkthrough
Step 1: Define the compliance checklist
Create a structured checklist covering key legal areas for search API usage.
import json
CHECKLIST = {
'data_collection': [
'API provider has explicit right to serve this data',
'No direct scraping of protected platforms by your code',
'Rate limits are respected and documented',
'API key is stored securely (env vars, not hardcoded)',
],
'data_usage': [
'Results displayed with attribution to source',
'No bulk redistribution of raw search results',
'Cached results have reasonable TTL (hours, not permanent)',
'Personal data in results handled per privacy laws',
],
'data_storage': [
'Raw API responses not stored permanently without justification',
'User search queries logged with consent',
'Data retention policy documented',
'Deletion mechanism exists for stored results',
],
'redistribution': [
'Results not resold as a competing data product',
'Aggregated insights (not raw data) shared with clients',
'Attribution maintained in any public-facing use',
'API provider ToS allows your specific use case',
]
}
print('Compliance Checklist Areas:')
for area, items in CHECKLIST.items():
print(f' {area}: {len(items)} items')Step 2: Verify API provider compliance status
Check what your API provider documentation says about data rights.
import os, requests
SH = {'x-api-key': os.environ['SCAVIO_API_KEY'], 'Content-Type': 'application/json'}
def check_api_health():
"""Verify the API is responding and check response headers for usage hints."""
data = requests.post('https://api.scavio.dev/api/v1/search',
headers=SH, json={'query': 'test compliance check', 'country_code': 'us'})
headers = dict(data.headers)
print('API Response Check:')
print(f' Status: {data.status_code}')
print(f' Rate limit headers present: {any("rate" in k.lower() for k in headers)}')
print(f' Content-Type: {headers.get("content-type", "unknown")}')
return data.status_code == 200
def document_usage():
"""Document what data you access and why."""
usage = {
'platforms_used': ['google', 'reddit', 'amazon', 'youtube'],
'data_types': ['organic_results', 'local_results', 'ai_overview'],
'purpose': 'Market research and competitive analysis',
'storage': 'Aggregated insights only, raw results cached 24h',
'redistribution': 'Internal use only, no raw data resale'
}
print('\nDocumented Usage:')
for k, v in usage.items():
print(f' {k}: {v}')
return usage
check_api_health()
document_usage()Step 3: Test rate limiting and error handling
Verify your code handles rate limits and errors gracefully.
import time
def test_rate_handling():
"""Test that your code handles API rate limits properly."""
results = []
for i in range(5):
try:
r = requests.post('https://api.scavio.dev/api/v1/search',
headers=SH, json={'query': f'compliance test {i}', 'country_code': 'us'})
results.append({'status': r.status_code, 'ok': r.status_code == 200})
if r.status_code == 429:
print(f' Request {i+1}: Rate limited (429). Backing off...')
time.sleep(2)
else:
print(f' Request {i+1}: {r.status_code} OK')
except Exception as e:
print(f' Request {i+1}: Error - {e}')
results.append({'status': 0, 'ok': False})
success_rate = sum(1 for r in results if r['ok']) / len(results) * 100
print(f'\nSuccess rate: {success_rate:.0f}%')
print(f'Rate limit handling: {"PASS" if any(r["status"] == 429 for r in results) or success_rate == 100 else "NOT TESTED"}')
test_rate_handling()Step 4: Generate compliance audit report
Run the full audit and output a compliance status report.
def run_audit():
print('\n=== Search API Compliance Audit ===')
print(f'Date: {__import__("datetime").datetime.now().isoformat()[:10]}')
total_items = 0
for area, items in CHECKLIST.items():
print(f'\n {area.upper().replace("_", " ")}:')
for item in items:
# In production, these would be manual checks or automated tests
status = 'REVIEW'
print(f' [{status}] {item}')
total_items += 1
print(f'\n SUMMARY:')
print(f' Total items: {total_items}')
print(f' Status: Manual review required')
print(f' Recommendation: Document each item status and review quarterly')
print(f'\n KEY NOTES:')
print(f' - Structured APIs like Scavio access public search results without direct scraping')
print(f' - Each platform has different ToS for data usage and redistribution')
print(f' - Consult legal counsel for commercial use of search data at scale')
print(f' - Keep API responses cached short-term, store only aggregated insights long-term')
run_audit()Python Example
import os, requests
SH = {'x-api-key': os.environ['SCAVIO_API_KEY'], 'Content-Type': 'application/json'}
def compliance_check():
r = requests.post('https://api.scavio.dev/api/v1/search',
headers=SH, json={'query': 'test', 'country_code': 'us'})
print(f'API status: {r.status_code}')
print(f'Response type: {r.headers.get("content-type")}')
print(f'\nChecklist:')
for item in ['API key in env var', 'Rate limits respected', 'No raw data resale', 'Attribution maintained']:
print(f' [ ] {item}')
compliance_check()JavaScript Example
const SH = { 'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json' };
async function complianceCheck() {
const r = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: SH,
body: JSON.stringify({ query: 'test', country_code: 'us' })
});
console.log(`API status: ${r.status}`);
console.log(`Content-Type: ${r.headers.get('content-type')}`);
for (const item of ['API key in env var', 'Rate limits respected', 'No raw data resale']) {
console.log(` [ ] ${item}`);
}
}
complianceCheck().catch(console.error);Expected Output
Compliance Checklist Areas:
data_collection: 4 items
data_usage: 4 items
data_storage: 4 items
redistribution: 4 items
API Response Check:
Status: 200
Rate limit headers present: True
Content-Type: application/json
=== Search API Compliance Audit ===
Date: 2026-05-19
DATA COLLECTION:
[REVIEW] API provider has explicit right to serve this data
[REVIEW] No direct scraping of protected platforms by your code
SUMMARY:
Total items: 16
Status: Manual review required