Ground a local LLM (Ollama, LM Studio, vLLM) with web search by routing queries through a structured SERP API instead of raw web fetching. Structured JSON uses 600-800 tokens vs 4,000-8,000 for raw HTML, fitting better in limited local model context windows.
Prerequisites
- Ollama or LM Studio running locally
- Scavio API key
- Python 3.8+
- A model with function calling support (e.g., llama3, mistral)
Walkthrough
Step 1: Create a search function
Build a search tool that returns structured results.
import requests, os
def web_search(query, platform='google'):
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': os.environ['SCAVIO_API_KEY'],
'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'platform': platform})
data = resp.json()
return [{'title': r.get('title', ''), 'snippet': r.get('snippet', ''),
'url': r.get('link', '')}
for r in data.get('organic_results', [])[:5]]Step 2: Integrate with Ollama
Use the search function as a tool in Ollama conversations.
import ollama
def grounded_query(question):
search_results = web_search(question)
context = '\n'.join([f"- {r['title']}: {r['snippet']}" for r in search_results])
response = ollama.chat(model='llama3', messages=[{
'role': 'user',
'content': f'Based on these search results:\n{context}\n\nAnswer: {question}'
}])
return response['message']['content']
answer = grounded_query('What is the latest Next.js version?')
print(answer)Python Example
import requests, os, ollama
def web_search(query):
r = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': os.environ['SCAVIO_API_KEY'],
'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us'}).json()
return [{'title': r.get('title', ''), 'snippet': r.get('snippet', '')}
for r in r.get('organic_results', [])[:5]]
def grounded_chat(question, model='llama3'):
results = web_search(question)
context = '\n'.join([f'- {r["title"]}: {r["snippet"]}' for r in results])
response = ollama.chat(model=model, messages=[{
'role': 'system',
'content': 'Answer based on the provided search results. Cite sources.'
}, {
'role': 'user',
'content': f'Search results:\n{context}\n\nQuestion: {question}'
}])
return response['message']['content']
print(grounded_chat('What is the current Stripe API version?'))JavaScript Example
const H = {'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json'};
async function webSearch(query) {
const r = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: H,
body: JSON.stringify({query, country_code: 'us'})
}).then(r => r.json());
return (r.organic_results || []).slice(0, 5).map(r => ({
title: r.title, snippet: r.snippet || ''
}));
}
async function groundedChat(question) {
const results = await webSearch(question);
const context = results.map(r => `- ${r.title}: ${r.snippet}`).join('\n');
// Pass context + question to your local LLM via its API
console.log(`Grounding context (${context.length} chars) for: ${question}`);
return context;
}
groundedChat('What is the current Next.js version?');Expected Output
Local LLM responses grounded in current web data. The model cites search results instead of relying on potentially outdated training data.