Search grounding gives AI agents access to real-time web data so they stop hallucinating outdated facts. The pattern works with any Python agent framework: before the LLM generates an answer, query a search API for relevant results and inject them into the prompt context. This tutorial shows a framework-agnostic approach using the Scavio API that works whether you use OpenAI, Anthropic, LangChain, or a bare Python loop. At $0.005 per search, grounding adds less than a penny per agent turn.
Prerequisites
- Python 3.9+ installed
- requests library installed
- A Scavio API key from scavio.dev
- Any LLM API key (OpenAI, Anthropic, etc.)
Walkthrough
Step 1: Create the search grounding function
Build a reusable function that takes a query and returns formatted context. This function becomes the bridge between your agent and live web data.
import requests, os
API_KEY = os.environ['SCAVIO_API_KEY']
def ground_with_search(query: str, max_results: int = 5) -> str:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us'})
resp.raise_for_status()
results = resp.json().get('organic_results', [])[:max_results]
context = '\n'.join(
f'[{r["position"]}] {r["title"]}\n {r.get("snippet", "")}\n Source: {r["link"]}'
for r in results
)
return f'Search results for: {query}\n\n{context}'Step 2: Inject grounding into any prompt template
Wrap your existing prompt with search context. The grounding data goes before the user question so the LLM can reference it.
def build_grounded_prompt(user_question: str) -> str:
search_context = ground_with_search(user_question)
return f"""Use the following search results to answer accurately.
Do not make up information not present in the results.
{search_context}
Question: {user_question}
Answer:"""
prompt = build_grounded_prompt('What is the latest Python version in 2026?')
print(prompt[:500])Step 3: Use with OpenAI / Anthropic directly
Pass the grounded prompt to any LLM API. This example shows both OpenAI and Anthropic patterns.
# With OpenAI:
from openai import OpenAI
client = OpenAI()
def ask_grounded_openai(question: str) -> str:
prompt = build_grounded_prompt(question)
response = client.chat.completions.create(
model='gpt-4o',
messages=[{'role': 'user', 'content': prompt}],
max_tokens=500
)
return response.choices[0].message.content
# With Anthropic:
import anthropic
client_a = anthropic.Anthropic()
def ask_grounded_anthropic(question: str) -> str:
prompt = build_grounded_prompt(question)
msg = client_a.messages.create(
model='claude-sonnet-4-20250514',
max_tokens=500,
messages=[{'role': 'user', 'content': prompt}]
)
return msg.content[0].textStep 4: Use as a LangChain tool
If you use LangChain, wrap the search function as a tool so the agent can call it autonomously when it needs current data.
from langchain.tools import tool
@tool
def web_search(query: str) -> str:
"""Search the web for current information. Use this when you need
up-to-date facts, prices, or recent events."""
return ground_with_search(query)
# Register with any LangChain agent:
# agent = create_react_agent(llm, tools=[web_search], ...)Step 5: Add caching to reduce duplicate searches
In multi-turn conversations, the agent may search for the same thing twice. A simple TTL cache prevents duplicate API calls.
from functools import lru_cache
import hashlib
_cache = {}
def cached_ground(query: str, ttl_seconds: int = 300) -> str:
import time
key = hashlib.md5(query.encode()).hexdigest()
if key in _cache:
result, timestamp = _cache[key]
if time.time() - timestamp < ttl_seconds:
return result
result = ground_with_search(query)
_cache[key] = (result, time.time())
return result
# First call hits API, second returns cached result
ctx1 = cached_ground('python 3.14 release date')
ctx2 = cached_ground('python 3.14 release date') # cached, no API call
print(f'Cache size: {len(_cache)} entries')Python Example
import os, requests
API_KEY = os.environ['SCAVIO_API_KEY']
def ground_with_search(query: str, max_results: int = 5) -> str:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us'})
resp.raise_for_status()
results = resp.json().get('organic_results', [])[:max_results]
return '\n'.join(
f'[{r["position"]}] {r["title"]}\n {r.get("snippet", "")}\n Source: {r["link"]}'
for r in results
)
def build_grounded_prompt(question: str) -> str:
ctx = ground_with_search(question)
return f'Use these search results to answer accurately:\n\n{ctx}\n\nQuestion: {question}\nAnswer:'
def main():
question = 'What is the latest Python version in 2026?'
prompt = build_grounded_prompt(question)
print(prompt)
print(f'\nGrounding cost: $0.005 per search')
if __name__ == '__main__':
main()JavaScript Example
const API_KEY = process.env.SCAVIO_API_KEY;
async function groundWithSearch(query, maxResults = 5) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': API_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us' })
});
const data = await resp.json();
return (data.organic_results || []).slice(0, maxResults)
.map(r => `[${r.position}] ${r.title}\n ${r.snippet || ''}\n Source: ${r.link}`)
.join('\n');
}
async function main() {
const question = 'What is the latest Python version in 2026?';
const context = await groundWithSearch(question);
const prompt = `Use these results to answer:\n\n${context}\n\nQ: ${question}\nA:`;
console.log(prompt);
console.log('\nGrounding cost: $0.005 per search');
}
main().catch(console.error);Expected Output
Search results for: What is the latest Python version in 2026?
[1] Python Release Python 3.14.0
Python 3.14.0 was released on April 15, 2026 with experimental JIT...
Source: https://www.python.org/downloads/release/python-3140/
[2] What's New In Python 3.14
This article explains the new features in Python 3.14...
Source: https://docs.python.org/3/whatsnew/3.14.html
Grounding cost: $0.005 per search