AI agents that search on every turn burn through credits fast. A research agent running 50 searches per session at $0.005 each costs $0.25 per conversation. This tutorial implements four optimizations: result caching, query deduplication, platform-aware routing, and hard budget caps. Together they cut costs 60-80% without degrading output.
Prerequisites
- Python 3.8+ installed
- requests library installed
- A Scavio API key from scavio.dev
- An existing agent that uses search
Walkthrough
Step 1: Add a TTL cache for search results
Cache results for a configurable window. Most SERP data stays valid for 1-24 hours.
import os, requests, hashlib, time
API_KEY = os.environ['SCAVIO_API_KEY']
H = {'x-api-key': API_KEY, 'Content-Type': 'application/json'}
class SearchCache:
def __init__(self, ttl=3600):
self.cache = {}
self.ttl = ttl
self.hits = self.misses = 0
def _key(self, query, platform):
return hashlib.md5(f'{query}:{platform}'.encode()).hexdigest()
def get(self, query, platform='google'):
key = self._key(query, platform)
if key in self.cache and time.time() - self.cache[key]['ts'] < self.ttl:
self.hits += 1
return self.cache[key]['data']
self.misses += 1
return None
def set(self, query, platform, data):
self.cache[self._key(query, platform)] = {'data': data, 'ts': time.time()}
def stats(self):
total = self.hits + self.misses
rate = self.hits / total * 100 if total else 0
return f'{self.hits} hits, {self.misses} misses ({rate:.0f}%)'Step 2: Implement query deduplication
Normalize queries to catch near-duplicates before they hit the API.
import re
def normalize_query(query):
q = re.sub(r'[^a-z0-9\s]', '', query.lower().strip())
stop = {'the', 'a', 'an', 'is', 'are', 'what', 'how', 'do', 'does'}
return ' '.join(sorted(w for w in q.split() if w not in stop))
class SmartSearch:
def __init__(self):
self.cache = SearchCache()
self.norm_map = {}
self.api_calls = 0
def search(self, query, platform=None):
if platform is None:
platform = self._route(query)
norm = normalize_query(query)
if norm in self.norm_map:
cached = self.cache.get(self.norm_map[norm], platform)
if cached: return cached
cached = self.cache.get(query, platform)
if cached: return cached
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers=H, json={'query': query, 'platform': platform, 'country_code': 'us'})
data = resp.json()
self.cache.set(query, platform, data)
self.norm_map[norm] = query
self.api_calls += 1
return data
def _route(self, query):
q = query.lower()
if any(w in q for w in ['price', 'buy', 'amazon', 'product']): return 'amazon'
if any(w in q for w in ['reddit', 'opinion', 'recommend']): return 'reddit'
if any(w in q for w in ['video', 'youtube', 'tutorial']): return 'youtube'
return 'google'Step 3: Add budget caps
Prevent agents from exceeding a cost threshold per session.
class BudgetSearch(SmartSearch):
def __init__(self, max_budget=0.50):
super().__init__()
self.max_budget = max_budget
def search(self, query, platform=None):
if self.api_calls * 0.005 >= self.max_budget:
return {'error': 'Budget exceeded', 'organic_results': []}
return super().search(query, platform)
def stats(self):
cost = self.api_calls * 0.005
print(f'API calls: {self.api_calls} (${cost:.3f} / ${self.max_budget:.3f} budget)')
print(f'Cache: {self.cache.stats()}')Step 4: Test savings with simulated agent session
Run a realistic workload and measure savings vs naive approach.
smart = BudgetSearch(max_budget=0.50)
queries = [
'best SERP API pricing 2026',
'SERP API pricing comparison 2026', # near-duplicate
'best serp api pricing 2026', # case duplicate
'amazon price tracking API', # -> amazon
'reddit opinions on tavily', # -> reddit
'youtube tutorial web scraping', # -> youtube
'SERP API pricing comparison 2026', # cached
'web scraping API for beginners',
'web scraping api beginners', # near-duplicate
]
for q in queries:
data = smart.search(q)
n = len(data.get('organic_results', data.get('products', [])))
print(f' "{q[:45]}" -> {n} results')
print()
smart.stats()
naive = len(queries) * 0.005
actual = smart.api_calls * 0.005
print(f'Naive: ${naive:.3f}, Actual: ${actual:.3f}, Saved: {(1-actual/naive)*100:.0f}%')Python Example
import os, requests, hashlib, time
API_KEY = os.environ['SCAVIO_API_KEY']
H = {'x-api-key': API_KEY, 'Content-Type': 'application/json'}
cache = {}
hits = misses = 0
def search(query, platform='google', ttl=3600):
global hits, misses
key = hashlib.md5(f'{query.lower()}:{platform}'.encode()).hexdigest()
if key in cache and time.time() - cache[key]['ts'] < ttl:
hits += 1; return cache[key]['data']
misses += 1
data = requests.post('https://api.scavio.dev/api/v1/search',
headers=H, json={'query': query, 'platform': platform, 'country_code': 'us'}).json()
cache[key] = {'data': data, 'ts': time.time()}
return data
for q in ['serp api', 'serp api', 'SERP API']:
search(q.lower())
print(f'Hits: {hits}, Misses: {misses}, Saved: ${hits * 0.005:.3f}')JavaScript Example
const API_KEY = process.env.SCAVIO_API_KEY;
const H = { 'x-api-key': API_KEY, 'Content-Type': 'application/json' };
const cache = new Map();
let hits = 0, misses = 0;
async function search(query, platform = 'google', ttl = 3600) {
const key = `${query.toLowerCase()}:${platform}`;
const c = cache.get(key);
if (c && Date.now() - c.ts < ttl * 1000) { hits++; return c.data; }
misses++;
const data = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: H,
body: JSON.stringify({ query, platform, country_code: 'us' })
}).then(r => r.json());
cache.set(key, { data, ts: Date.now() });
return data;
}
async function main() {
for (const q of ['serp api', 'serp api', 'SERP API']) await search(q);
console.log(`Hits: ${hits}, Misses: ${misses}`);
}
main().catch(console.error);Expected Output
"best SERP API pricing 2026" -> 5 results
"SERP API pricing comparison 2026" -> 5 results
"best serp api pricing 2026" -> 5 results (cached)
"amazon price tracking API" -> 5 results
"reddit opinions on tavily" -> 5 results
"youtube tutorial web scraping" -> 5 results
"SERP API pricing comparison 2026" -> 5 results (cached)
"web scraping API for beginners" -> 5 results
"web scraping api beginners" -> 5 results (cached)
API calls: 6 ($0.030 / $0.500 budget)
Cache: 3 hits, 6 misses (33%)
Naive: $0.045, Actual: $0.030, Saved: 33%