Cheaper search does not help if the results are poor. This tutorial builds an automated benchmark that scores search API result quality using relevance metrics and calculates a quality-per-dollar ratio. You test the same queries across multiple providers and get a ranked comparison. The benchmark uses keyword overlap, domain authority heuristics, and freshness scoring -- no LLM judge needed.
Prerequisites
- Python 3.9+ installed
- API keys for at least two search providers
- requests library installed
Walkthrough
Step 1: Define benchmark queries with expected results
Create a test set of queries where you know what good results look like. Include the expected top domains and key terms that should appear.
benchmark_queries = [
{
'query': 'python asyncio tutorial 2026',
'expected_domains': ['docs.python.org', 'realpython.com', 'stackoverflow.com'],
'expected_terms': ['asyncio', 'await', 'coroutine', 'event loop'],
},
{
'query': 'next.js app router server components',
'expected_domains': ['nextjs.org', 'vercel.com', 'react.dev'],
'expected_terms': ['server components', 'app router', 'next.js'],
},
{
'query': 'best CRM software small business 2026',
'expected_domains': ['forbes.com', 'g2.com', 'capterra.com'],
'expected_terms': ['CRM', 'pricing', 'small business', 'features'],
},
{
'query': 'docker compose networking tutorial',
'expected_domains': ['docs.docker.com', 'stackoverflow.com'],
'expected_terms': ['docker', 'compose', 'network', 'bridge'],
},
{
'query': 'react useEffect cleanup function',
'expected_domains': ['react.dev', 'stackoverflow.com', 'kentcdodds.com'],
'expected_terms': ['useEffect', 'cleanup', 'unmount', 'return'],
},
]
print(f'{len(benchmark_queries)} benchmark queries defined')Step 2: Build the quality scoring function
Score each result set on domain match, term coverage, result count, and snippet quality. Returns a 0-100 quality score.
def score_results(results: list, expected_domains: list, expected_terms: list) -> dict:
if not results:
return {'total': 0, 'domain_score': 0, 'term_score': 0, 'count_score': 0}
# Domain match score (0-40 points)
result_domains = [r.get('link', '').split('/')[2] if '/' in r.get('link', '') else ''
for r in results[:10]]
domain_hits = sum(1 for ed in expected_domains
if any(ed in rd for rd in result_domains))
domain_score = min(domain_hits / max(len(expected_domains), 1) * 40, 40)
# Term coverage score (0-40 points)
all_text = ' '.join(r.get('title', '') + ' ' + r.get('snippet', '')
for r in results[:10]).lower()
term_hits = sum(1 for t in expected_terms if t.lower() in all_text)
term_score = min(term_hits / max(len(expected_terms), 1) * 40, 40)
# Result count score (0-20 points)
count_score = min(len(results) / 10 * 20, 20)
total = round(domain_score + term_score + count_score, 1)
return {'total': total, 'domain_score': round(domain_score, 1),
'term_score': round(term_score, 1), 'count_score': round(count_score, 1)}
print('Quality scorer ready')Step 3: Run benchmarks across providers and compute quality-per-dollar
Execute each benchmark query against each provider, score the results, and calculate the quality-per-dollar ratio.
import requests, os, time
SCAVIO_KEY = os.environ.get('SCAVIO_API_KEY', '')
def search_scavio(query):
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': 10})
return [{'title': r['title'], 'link': r['link'], 'snippet': r.get('snippet', '')}
for r in resp.json().get('organic_results', [])]
PRICES = {'scavio': 0.005}
def run_benchmark():
results = {}
for provider in PRICES:
scores = []
for bq in benchmark_queries:
search_results = search_scavio(bq['query'])
score = score_results(search_results, bq['expected_domains'], bq['expected_terms'])
scores.append(score['total'])
time.sleep(0.3)
avg_score = sum(scores) / len(scores)
price = PRICES[provider]
qpd = avg_score / (price * 1000) if price > 0 else avg_score # quality per $1
results[provider] = {'avg_quality': round(avg_score, 1),
'price_per_1k': price * 1000,
'quality_per_dollar': round(qpd, 1)}
print(f'{"Provider":<15} {"Quality":>8} {"$/1K":>8} {"Q/$":>8}')
print('-' * 42)
for name, r in sorted(results.items(), key=lambda x: -x[1]['quality_per_dollar']):
print(f'{name:<15} {r["avg_quality"]:>7.1f} {r["price_per_1k"]:>7.2f} {r["quality_per_dollar"]:>7.1f}')
return results
run_benchmark()Python Example
import requests, os, time
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
def search(query):
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': 10})
return resp.json().get('organic_results', [])
def score(results, terms):
text = ' '.join(r.get('snippet', '') for r in results).lower()
hits = sum(1 for t in terms if t.lower() in text)
return hits / len(terms) * 100 if terms else 0
queries = [('python asyncio', ['asyncio', 'await', 'coroutine']),
('react hooks', ['useState', 'useEffect', 'hook'])]
for q, terms in queries:
results = search(q)
s = score(results, terms)
print(f'{q}: quality={s:.0f}/100, cost=$0.005')JavaScript Example
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
async function search(query) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us', num_results: 10 })
});
return (await resp.json()).organic_results || [];
}
function score(results, terms) {
const text = results.map(r => r.snippet || '').join(' ').toLowerCase();
const hits = terms.filter(t => text.includes(t.toLowerCase())).length;
return (hits / terms.length) * 100;
}
const results = await search('python asyncio tutorial');
console.log(`Quality: ${score(results, ['asyncio', 'await', 'coroutine']).toFixed(0)}/100`);Expected Output
5 benchmark queries defined
Quality scorer ready
Provider Quality $/1K Q/$
------------------------------------------
scavio 78.4 5.00 15.7
Interpretation: 15.7 quality points per dollar spent
Higher Q/$ = better value for money