Agents that fetch raw web pages consume thousands of tokens parsing HTML to find a few facts. Structured search APIs return only the relevant data (title, snippet, URL) in 150-300 tokens per query. This tutorial shows how to replace page-fetching with structured search to cut token costs by 80-90%.
Prerequisites
- An existing agent that fetches web pages for information
- Python 3.8+ or Node.js 18+
- A Scavio API key
Walkthrough
Step 1: Measure current token usage
Calculate how many tokens your current web-fetching approach uses per search.
import tiktoken
# Typical raw web page fetch (e.g., via requests + BeautifulSoup or Fetch MCP):
raw_page_tokens = 5000 # Average web page after HTML stripping
useful_tokens = 200 # What the LLM actually needs from that page
waste_ratio = (raw_page_tokens - useful_tokens) / raw_page_tokens
print(f'Current waste: {waste_ratio:.0%} of tokens are unused context')
# Output: Current waste: 96% of tokens are unused context
# With structured search API:
structured_tokens = 250 # Average Scavio response (5 results)
print(f'Structured approach: {structured_tokens} tokens per search')
print(f'Savings: {(raw_page_tokens - structured_tokens) / raw_page_tokens:.0%}')
# Output: Savings: 95%Step 2: Replace fetch-and-parse with search
Replace web page fetching with a structured search API call.
import requests, os
H = {'x-api-key': os.environ['SCAVIO_API_KEY'], 'Content-Type': 'application/json'}
# BEFORE: Fetch full page and extract info (5000+ tokens)
# page = requests.get(url).text
# soup = BeautifulSoup(page, 'html.parser')
# text = soup.get_text()[:3000] # Still 1000+ tokens
# AFTER: Get structured results (250 tokens)
def efficient_search(query: str) -> str:
resp = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': 'google', 'query': query}, timeout=10)
results = resp.json().get('organic', [])[:5]
# Return only what the LLM needs
return '\n'.join(f"{r['title']}: {r.get('snippet','')}" for r in results)Step 3: Set token budgets per tool call
Configure your agent to enforce token limits on search results.
MAX_SEARCH_TOKENS = 500 # Hard limit per search tool call
def budget_search(query: str, max_tokens: int = MAX_SEARCH_TOKENS) -> str:
resp = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': 'google', 'query': query}, timeout=10)
results = resp.json().get('organic', [])[:5]
output_lines = []
token_count = 0
enc = tiktoken.encoding_for_model('gpt-4')
for r in results:
line = f"{r['title']}: {r.get('snippet','')}"
line_tokens = len(enc.encode(line))
if token_count + line_tokens > max_tokens:
break
output_lines.append(line)
token_count += line_tokens
return '\n'.join(output_lines)Step 4: Calculate cost savings
Compare the token cost difference between approaches.
# Cost comparison (Claude Sonnet 4.6 pricing):
input_cost_per_1m = 3.0 # $3/M input tokens
# Old approach: 5000 tokens/search * 100 searches/day = 500K tokens/day
old_daily_cost = (500_000 / 1_000_000) * input_cost_per_1m
print(f'Old approach: ${old_daily_cost:.2f}/day ({500_000} tokens)')
# New approach: 250 tokens/search * 100 searches/day = 25K tokens/day
new_daily_cost = (25_000 / 1_000_000) * input_cost_per_1m
print(f'New approach: ${new_daily_cost:.4f}/day ({25_000} tokens)')
# Plus Scavio API cost: 100 searches * $0.005 = $0.50/day
scavio_cost = 100 * 0.005
print(f'Scavio API cost: ${scavio_cost:.2f}/day')
print(f'Total new: ${new_daily_cost + scavio_cost:.2f}/day')
print(f'Savings: ${old_daily_cost - new_daily_cost - scavio_cost:.2f}/day')Python Example
import requests, os
H = {'x-api-key': os.environ['SCAVIO_API_KEY'], 'Content-Type': 'application/json'}
def efficient_search(query, max_results=3):
r = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': 'google', 'query': query}).json()
return '\n'.join(f"{x['title']}: {x.get('snippet','')}" for x in r.get('organic',[])[:max_results])JavaScript Example
async function efficientSearch(query, maxResults = 3) {
const r = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: {'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json'},
body: JSON.stringify({platform: 'google', query})
});
return (await r.json()).organic?.slice(0, maxResults)
.map(x => `${x.title}: ${x.snippet}`).join('\n');
}Expected Output
An agent that uses 80-95% fewer tokens per search by getting structured results instead of fetching full web pages.