Scale a job search agent from hundreds to 100K users by adding a result cache layer, per-user rate limiting, credit budget allocation, batch query processing, and cost monitoring. At 100K users each running 5 searches/day, you need 500K daily API calls. Without optimization, that costs $2,500/day at $0.005/credit. With caching (60% hit rate), batching, and deduplication, actual API calls drop to roughly 150K/day ($750/day). This tutorial implements each optimization layer.
Prerequisites
- Python 3.8+ installed
- requests and redis libraries installed
- Redis running locally or a managed instance
- A Scavio API key from scavio.dev
Walkthrough
Step 1: Add caching layer
Cache search results by query to avoid duplicate API calls. With 100K users, many search the same job titles and locations.
import os, requests, hashlib, json, time
API_KEY = os.environ['SCAVIO_API_KEY']
# In-memory cache (use Redis in production)
cache = {}
CACHE_TTL = 3600 # 1 hour for job listings
def cached_search(query: str) -> list:
key = hashlib.md5(query.lower().strip().encode()).hexdigest()
now = time.time()
if key in cache and now - cache[key]['ts'] < CACHE_TTL:
cache[key]['hits'] += 1
return cache[key]['data']
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY},
json={'platform': 'google', 'query': query}, timeout=10)
results = resp.json().get('organic_results', [])
cache[key] = {'data': results, 'ts': now, 'hits': 0}
return results
# Test cache hit rate
for _ in range(5):
cached_search('software engineer jobs san francisco 2026')
total_hits = sum(v['hits'] for v in cache.values())
print(f'Cache entries: {len(cache)}, Total hits: {total_hits}')Step 2: Rate limit per user
Enforce per-user rate limits to prevent abuse and ensure fair resource distribution across all 100K users.
from collections import defaultdict
user_usage = defaultdict(lambda: {'count': 0, 'window_start': 0})
MAX_QUERIES_PER_HOUR = 20
def rate_limited_search(user_id: str, query: str) -> dict:
now = time.time()
usage = user_usage[user_id]
if now - usage['window_start'] > 3600:
usage['count'] = 0
usage['window_start'] = now
if usage['count'] >= MAX_QUERIES_PER_HOUR:
return {'error': 'rate_limited', 'retry_after': int(3600 - (now - usage['window_start']))}
usage['count'] += 1
results = cached_search(query)
return {'results': results, 'remaining': MAX_QUERIES_PER_HOUR - usage['count']}
# Test
result = rate_limited_search('user_123', 'data analyst jobs NYC')
print(f"Results: {len(result.get('results', []))}, Remaining: {result.get('remaining', 0)}")Step 3: Budget credits by tier
Allocate monthly API credit budgets per user tier: free users get limited searches, paid users get more.
TIER_BUDGETS = {
'free': 50, # 50 searches/month
'basic': 500, # 250 searches/month
'pro': 5000, # 5000 searches/month
}
user_budgets = defaultdict(lambda: {'used': 0, 'tier': 'free'})
def budgeted_search(user_id: str, query: str) -> dict:
budget = user_budgets[user_id]
tier = budget['tier']
limit = TIER_BUDGETS.get(tier, 50)
if budget['used'] >= limit:
return {'error': 'budget_exceeded', 'tier': tier, 'limit': limit}
# Check cache first (free, does not count against budget)
key = hashlib.md5(query.lower().strip().encode()).hexdigest()
if key in cache and time.time() - cache[key]['ts'] < CACHE_TTL:
return {'results': cache[key]['data'], 'budget_used': budget['used'], 'from_cache': True}
budget['used'] += 1
results = cached_search(query)
return {'results': results, 'budget_used': budget['used'], 'budget_limit': limit}
user_budgets['user_456']['tier'] = 'pro'
result = budgeted_search('user_456', 'product manager jobs remote')
print(f"Budget used: {result.get('budget_used')}/{TIER_BUDGETS['pro']}")Step 4: Batch queries for efficiency
Process multiple job searches in a single batch to reduce overhead and enable deduplication across users.
from concurrent.futures import ThreadPoolExecutor
def batch_search(queries: list, max_workers: int = 10) -> list:
# Deduplicate
unique = list({q.lower().strip() for q in queries})
print(f'Batch: {len(queries)} queries, {len(unique)} unique')
results = {}
uncached = []
for q in unique:
key = hashlib.md5(q.encode()).hexdigest()
if key in cache and time.time() - cache[key]['ts'] < CACHE_TTL:
results[q] = cache[key]['data']
else:
uncached.append(q)
print(f'Cache hits: {len(unique) - len(uncached)}, API calls needed: {len(uncached)}')
if uncached:
with ThreadPoolExecutor(max_workers=max_workers) as pool:
api_results = list(pool.map(cached_search, uncached))
for q, r in zip(uncached, api_results):
results[q] = r
return [results.get(q.lower().strip(), []) for q in queries]
batch = ['data scientist jobs', 'ML engineer jobs', 'data scientist jobs', 'AI researcher jobs']
results = batch_search(batch)
print(f'Returned {len(results)} result sets')Step 5: Monitor costs
Track API usage, cache hit rates, and projected monthly costs to stay within budget.
def cost_report() -> dict:
total_searches = sum(v['used'] for v in user_budgets.values())
cache_hits = sum(v['hits'] for v in cache.values())
cache_entries = len(cache)
api_calls = total_searches # approximate
cache_ratio = cache_hits / max(cache_hits + api_calls, 1)
cost_per_credit = 0.005
daily_cost = api_calls * cost_per_credit
monthly_projected = daily_cost * 30
report = {
'total_searches': total_searches,
'cache_entries': cache_entries,
'cache_hit_ratio': round(cache_ratio, 2),
'api_calls_today': api_calls,
'daily_cost': f'${daily_cost:.2f}',
'monthly_projected': f'${monthly_projected:.2f}',
}
for k, v in report.items():
print(f'{k}: {v}')
return report
cost_report()Python Example
import requests, os, hashlib, time
H = {'x-api-key': os.environ['SCAVIO_API_KEY']}
cache = {}
def job_search(query):
key = hashlib.md5(query.encode()).hexdigest()
if key in cache and time.time() - cache[key]['ts'] < 3600:
return cache[key]['data']
data = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': 'google', 'query': query}).json()
results = data.get('organic_results', [])
cache[key] = {'data': results, 'ts': time.time()}
return results
print(f'{len(job_search("data scientist jobs NYC"))} results')JavaScript Example
const H = {'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json'};
const cache = new Map();
async function jobSearch(query) {
const key = query.toLowerCase();
const c = cache.get(key);
if (c && Date.now() - c.ts < 3600000) return c.data;
const r = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: H, body: JSON.stringify({platform: 'google', query})
});
const data = (await r.json()).organic_results || [];
cache.set(key, {data, ts: Date.now()});
return data;
}
jobSearch('data scientist jobs NYC').then(r => console.log(r.length + ' results'));Expected Output
A job search agent architecture that handles 100K users with caching, rate limiting, credit budgeting, batch processing, and cost monitoring dashboards.