Local LLM research stacks (Ollama, LMStudio, LocalAI) provide privacy and zero per-token cost but lack access to current web data. Adding a search API as a tool gives your local model live grounding without sending your prompts to cloud LLMs. One HTTP call returns structured results your local model can cite.
Prerequisites
- Ollama or LMStudio running locally
- Python 3.8+
- A Scavio API key (free tier: 500/month)
Walkthrough
Step 1: Create the search tool function
Build a simple function that your local LLM can call for web search.
import requests, os
SCAVIO_KEY = os.environ.get('SCAVIO_API_KEY', 'your_key_here')
def web_search(query: str, platform: str = 'google') -> str:
"""Search the web and return structured results for the local LLM."""
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'platform': platform, 'query': query}, timeout=10)
results = resp.json().get('organic', [])[:5]
# Format for local LLM context (plain text, token-efficient)
lines = []
for r in results:
lines.append(f"- {r.get('title','')}: {r.get('snippet','')} ({r.get('link','')})")
return '\n'.join(lines) if lines else 'No results found.'Step 2: Integrate with Ollama
Use Ollama's tool calling to invoke web search when needed.
import ollama
def research_with_search(question: str) -> str:
# First, ask the model if it needs search
response = ollama.chat(model='llama3.2', messages=[
{'role': 'system', 'content': 'You are a research assistant. If you need current information, say SEARCH: <query>. Otherwise answer directly.'},
{'role': 'user', 'content': question}
])
answer = response['message']['content']
# If model requests search, fetch and re-prompt
if 'SEARCH:' in answer:
query = answer.split('SEARCH:')[1].strip()
search_results = web_search(query)
response = ollama.chat(model='llama3.2', messages=[
{'role': 'system', 'content': 'Answer using the search results below.'},
{'role': 'user', 'content': f'Question: {question}\n\nSearch results:\n{search_results}'}
])
return response['message']['content']
return answer
print(research_with_search('What is the current version of Python?'))Step 3: Add multi-platform research
Extend to search Reddit for opinions and YouTube for tutorials.
def deep_research(topic: str) -> dict:
google = web_search(topic, 'google')
reddit = web_search(topic, 'reddit')
youtube = web_search(topic, 'youtube')
context = f"""Research on: {topic}
Google results:
{google}
Reddit discussions:
{reddit}
YouTube videos:
{youtube}"""
response = ollama.chat(model='llama3.2', messages=[
{'role': 'system', 'content': 'Synthesize a research brief from the sources below. Cite sources.'},
{'role': 'user', 'content': context}
])
return {'topic': topic, 'brief': response['message']['content'], 'credits_used': 3}Python Example
import requests, os, ollama
def search(q, platform='google'):
r = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': os.environ['SCAVIO_API_KEY'], 'Content-Type': 'application/json'},
json={'platform': platform, 'query': q}).json()
return '\n'.join(f"- {x['title']}: {x.get('snippet','')}" for x in r.get('organic',[])[:5])
def research(q):
ctx = search(q)
return ollama.chat(model='llama3.2', messages=[{'role':'user','content':f'{q}\n\nContext:\n{ctx}'}])['message']['content']JavaScript Example
async function search(query, platform = 'google') {
const r = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: {'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json'},
body: JSON.stringify({platform, query})
});
return (await r.json()).organic?.slice(0,5).map(x => `- ${x.title}: ${x.snippet}`).join('\n');
}Expected Output
A local LLM research stack that can search the live web for current information, combining Ollama's privacy with Scavio's structured search data.