Ollama is great for running LLMs locally and privately, but local models have a knowledge cutoff. Your personal assistant cannot answer questions about current events, live prices, or recent releases. Adding a search tool gives Ollama access to real-time data while keeping the LLM itself local. This tutorial connects Ollama to the Scavio API ($0.005/search) so your assistant can search Google, Reddit, YouTube, and more when it needs current information.
Prerequisites
- Ollama installed with at least one model (llama3 recommended)
- Python 3.9+ installed
- requests library installed
- A Scavio API key from scavio.dev
Walkthrough
Step 1: Build the search-enabled assistant
Create a Python assistant that wraps Ollama with a search tool. The assistant decides when to search based on the question.
import os, requests
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
OLLAMA_URL = 'http://localhost:11434/v1/chat/completions'
def search_web(query: str, num: int = 5) -> str:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': num})
results = resp.json().get('organic_results', [])
return '\n'.join(f'[{i+1}] {r["title"]}: {r.get("snippet", "")}\nURL: {r["link"]}'
for i, r in enumerate(results))
def needs_search(question: str) -> bool:
triggers = ['latest', 'current', 'today', '2026', '2025', 'now',
'price', 'cost', 'version', 'release', 'news',
'best', 'top', 'compare', 'vs', 'weather',
'who won', 'what happened', 'how much']
q = question.lower()
return any(t in q for t in triggers)
def ask_assistant(question: str) -> dict:
use_search = needs_search(question)
context = ''
if use_search:
context = f'Web search results:\n{search_web(question)}\n\n'
messages = [
{'role': 'system', 'content': (
'You are a helpful personal assistant. '
+ ('Use the web search results to answer accurately. Cite sources [1],[2]. '
if use_search else 'Answer from your knowledge. Say if you are unsure.')
)},
{'role': 'user', 'content': f'{context}Question: {question}'}
]
resp = requests.post(OLLAMA_URL, json={
'model': 'llama3', 'messages': messages, 'max_tokens': 512
})
return {
'answer': resp.json()['choices'][0]['message']['content'],
'searched': use_search,
'cost': 0.005 if use_search else 0,
}
result = ask_assistant('What is the latest Python version in 2026?')
print(f'[{"SEARCHED" if result["searched"] else "LOCAL"}] ${result["cost"]}')
print(result['answer'])Step 2: Add platform-specific search commands
Let the assistant search specific platforms. Prefix questions with platform names to target Reddit for opinions, YouTube for tutorials, or Amazon for products.
def search_platform(query: str, platform: str = 'google') -> str:
site_map = {'reddit': 'site:reddit.com', 'youtube': 'site:youtube.com',
'amazon': 'site:amazon.com', 'walmart': 'site:walmart.com'}
prefix = site_map.get(platform, '')
full_query = f'{prefix} {query}'.strip()
return search_web(full_query)
def detect_platform(question: str) -> tuple[str, str]:
"""Detect if a platform is mentioned and return (platform, cleaned_question)."""
q = question.lower()
if 'on reddit' in q or 'reddit thinks' in q:
return 'reddit', question.replace('on reddit', '').replace('reddit thinks', '').strip()
if 'on youtube' in q or 'youtube tutorial' in q:
return 'youtube', question.replace('on youtube', '').replace('youtube tutorial', '').strip()
if 'on amazon' in q or 'amazon price' in q:
return 'amazon', question.replace('on amazon', '').replace('amazon price', '').strip()
return 'google', question
def smart_assistant(question: str) -> dict:
platform, clean_q = detect_platform(question)
use_search = needs_search(clean_q) or platform != 'google'
context = ''
if use_search:
results = search_platform(clean_q, platform)
context = f'Search results from {platform.upper()}:\n{results}\n\n'
messages = [
{'role': 'system', 'content': f'You are a helpful assistant. '
f'{"Answer from " + platform + " search results. Cite [1],[2]." if use_search else "Answer from knowledge."}'}
,
{'role': 'user', 'content': f'{context}Question: {question}'}
]
resp = requests.post(OLLAMA_URL, json={'model': 'llama3', 'messages': messages, 'max_tokens': 512})
return {'answer': resp.json()['choices'][0]['message']['content'],
'platform': platform, 'searched': use_search, 'cost': 0.005 if use_search else 0}
for q in ['What does reddit think about mechanical keyboards?',
'What is a Python list comprehension?',
'Best noise cancelling headphones on amazon']:
r = smart_assistant(q)
print(f'[{r["platform"]:8s}] ${r["cost"]} - {q[:50]}')
print(f' {r["answer"][:80]}...')
print()Step 3: Build an interactive chat loop
Create a terminal-based chat interface that maintains conversation history and searches when needed.
def chat_loop():
"""Interactive chat with search-augmented Ollama."""
history = []
total_cost = 0
print('Ollama Assistant with Web Search')
print('Type "quit" to exit, "cost" for session cost')
print('-' * 40)
while True:
question = input('\nYou: ').strip()
if not question:
continue
if question.lower() == 'quit':
print(f'\nSession cost: ${total_cost:.3f}')
break
if question.lower() == 'cost':
print(f'Session cost so far: ${total_cost:.3f}')
continue
result = smart_assistant(question)
total_cost += result['cost']
source = f'{result["platform"]} search' if result['searched'] else 'local knowledge'
print(f'\nAssistant [{source}, ${result["cost"]}]:')
print(result['answer'])
history.append({'question': question, **result})
# Run the chat loop (uncomment to use interactively)
# chat_loop()
# Simulate for the tutorial
for q in ['What is the latest Node.js version?', 'Explain Python decorators']:
r = smart_assistant(q)
source = f'{r["platform"]} search' if r['searched'] else 'local'
print(f'You: {q}')
print(f'Assistant [{source}]: {r["answer"][:100]}...')
print()Step 4: Add conversation memory and context
Track conversation history so the assistant remembers previous answers and can reference earlier searches.
class SearchAssistant:
def __init__(self, model: str = 'llama3'):
self.model = model
self.history = []
self.total_cost = 0
self.search_count = 0
def ask(self, question: str) -> str:
platform, clean_q = detect_platform(question)
use_search = needs_search(clean_q) or platform != 'google'
context = ''
if use_search:
context = f'\nWeb results ({platform}):\n{search_platform(clean_q, platform)}\n'
self.search_count += 1
self.total_cost += 0.005
# Build messages with history
messages = [{'role': 'system', 'content': (
'You are a helpful personal assistant with web search access. '
'Cite search results as [1],[2]. Remember previous conversation.'
)}]
# Add recent history (last 4 exchanges)
for h in self.history[-4:]:
messages.append({'role': 'user', 'content': h['question']})
messages.append({'role': 'assistant', 'content': h['answer']})
messages.append({'role': 'user', 'content': f'{context}\n{question}'})
resp = requests.post(OLLAMA_URL, json={
'model': self.model, 'messages': messages, 'max_tokens': 512
})
answer = resp.json()['choices'][0]['message']['content']
self.history.append({'question': question, 'answer': answer})
return answer
def stats(self):
print(f'Questions: {len(self.history)}')
print(f'Searches: {self.search_count}')
print(f'Total cost: ${self.total_cost:.3f}')
assistant = SearchAssistant()
print(assistant.ask('What is the latest Python version?'))
print(assistant.ask('Tell me more about its new features'))
assistant.stats()Python Example
import os, requests
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
def search(query, num=5):
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': num})
return '\n'.join(f'[{i+1}] {r["title"]}: {r.get("snippet","")}'
for i, r in enumerate(resp.json().get('organic_results', [])))
def ask(question):
triggers = ['latest', 'current', '2026', 'price', 'best', 'news', 'vs']
use_search = any(t in question.lower() for t in triggers)
ctx = f'Search results:\n{search(question)}\n\n' if use_search else ''
resp = requests.post('http://localhost:11434/v1/chat/completions', json={
'model': 'llama3', 'messages': [
{'role': 'system', 'content': 'Helpful assistant. Cite search results [1],[2] when available.'},
{'role': 'user', 'content': f'{ctx}Q: {question}'}], 'max_tokens': 512})
return resp.json()['choices'][0]['message']['content']
print(ask('What is the latest Python version in 2026?'))JavaScript Example
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
async function search(query, num = 5) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us', num_results: num })
});
return ((await resp.json()).organic_results || []).map((r, i) => `[${i+1}] ${r.title}: ${r.snippet || ''}`).join('\n');
}
async function ask(question) {
const needsSearch = /latest|current|2026|price|best|news|vs/i.test(question);
const ctx = needsSearch ? `Search results:\n${await search(question)}\n\n` : '';
const resp = await fetch('http://localhost:11434/v1/chat/completions', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: 'llama3', messages: [
{ role: 'system', content: 'Helpful assistant. Cite [1],[2] from search results.' },
{ role: 'user', content: `${ctx}Q: ${question}` }], max_tokens: 512 })
});
return (await resp.json()).choices[0].message.content;
}
ask('latest Python version 2026').then(console.log);Expected Output
[SEARCHED] $0.005
Based on the search results, the latest Python version as of 2026 is
Python 3.15, released in October 2025 [1]. The 3.15 release includes
performance improvements and new syntax features [2].
[reddit ] $0.005 - What does reddit think about mechanical keyboards?
According to Reddit discussions, r/MechanicalKeyboards recomm...
[google ] $0.000 - What is a Python list comprehension?
A list comprehension is a concise way to create lists in Pyth...
Questions: 2
Searches: 1
Total cost: $0.005