A personal knowledge base that runs locally keeps your data private and works offline. But local-only systems answer from stale data. Adding a search layer means your local LLM can check the web when it needs current information while keeping everything else private. This tutorial builds a personal KB that stores notes locally, answers from them first, and falls back to Scavio search ($0.005/query) when local knowledge is insufficient.
Prerequisites
- Ollama running locally with a model (llama3 or mistral)
- Python 3.9+ installed
- requests and chromadb libraries installed
- A Scavio API key from scavio.dev
Walkthrough
Step 1: Set up the local vector store for personal notes
Use ChromaDB to store and search your personal notes locally. This is the primary knowledge source that stays private.
import chromadb
import os
# Create a persistent local vector store
client = chromadb.PersistentClient(path='./personal_kb')
collection = client.get_or_create_collection('knowledge')
def add_note(title: str, content: str, tags: list = None):
"""Add a note to the personal knowledge base."""
doc_id = title.lower().replace(' ', '-')[:50]
metadata = {'title': title, 'tags': ','.join(tags or [])}
collection.upsert(
documents=[f'{title}\n{content}'],
metadatas=[metadata],
ids=[doc_id]
)
print(f'Added: {title}')
def search_local(query: str, n: int = 3) -> list:
"""Search the local knowledge base."""
results = collection.query(query_texts=[query], n_results=n)
docs = []
for i, doc in enumerate(results['documents'][0]):
meta = results['metadatas'][0][i]
distance = results['distances'][0][i]
docs.append({'content': doc, 'title': meta.get('title', ''),
'distance': distance, 'source': 'local'})
return docs
# Add some personal notes
add_note('Python project structure', 'I prefer src/ layout with pyproject.toml. Tests go in tests/ at root level.', ['python', 'setup'])
add_note('API design preferences', 'Always use POST for search endpoints. Return JSON with consistent error format.', ['api', 'design'])
add_note('Deployment checklist', 'Railway for APIs, Vercel for frontends, Cloudflare for workers.', ['deploy'])
results = search_local('how do I structure python projects')
print(f'\nFound {len(results)} local results')
for r in results:
print(f' [{r["distance"]:.3f}] {r["title"]}')Step 2: Add the web search fallback
When local knowledge is insufficient (high distance score or no results), fall back to web search via Scavio.
import requests
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
def search_web(query: str, num: int = 5) -> list:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': num})
return [{'content': f"{r['title']}\n{r.get('snippet', '')}",
'title': r['title'], 'url': r['link'],
'source': 'web'} for r in resp.json().get('organic_results', [])]
def smart_search(query: str, local_threshold: float = 1.0) -> list:
"""Search local KB first, fall back to web if needed."""
local_results = search_local(query)
# Check if local results are good enough
if local_results and local_results[0]['distance'] < local_threshold:
print(f'[LOCAL] Found {len(local_results)} relevant notes')
return local_results
# Fall back to web search
print(f'[WEB] Local KB insufficient, searching web ($0.005)')
web_results = search_web(query)
return local_results + web_results
# Test: should find local result
print('Query: python project structure')
results = smart_search('python project structure')
print()
# Test: should fall back to web
print('Query: latest FastAPI release 2026')
results = smart_search('latest FastAPI release 2026')Step 3: Connect to the local LLM
Send the retrieved context to Ollama for answering. The LLM gets local notes when available and web results when needed.
LLM_URL = 'http://localhost:11434/v1/chat/completions'
def ask_kb(question: str) -> dict:
"""Ask a question to the personal KB."""
results = smart_search(question)
# Build context
context_parts = []
for i, r in enumerate(results, 1):
source = r['source']
if source == 'local':
context_parts.append(f'[{i}] (Personal Note) {r["content"]}')
else:
context_parts.append(f'[{i}] (Web) {r["content"]}')
context = '\n\n'.join(context_parts)
messages = [
{'role': 'system', 'content': (
'You are a personal assistant with access to the user\'s notes and web search. '
'Prefer personal notes when relevant. Cite sources as [1], [2], etc. '
'Mark whether each source is from personal notes or web search.'
)},
{'role': 'user', 'content': f'Context:\n{context}\n\nQuestion: {question}'}
]
resp = requests.post(LLM_URL, json={
'model': 'llama3', 'messages': messages, 'max_tokens': 512
})
answer = resp.json()['choices'][0]['message']['content']
used_web = any(r['source'] == 'web' for r in results)
return {
'answer': answer,
'sources': [r['source'] for r in results],
'cost': 0.005 if used_web else 0,
}
result = ask_kb('How should I structure a new Python project?')
print(f'A: {result["answer"]}')
print(f'Sources: {result["sources"]}, Cost: ${result["cost"]}')Step 4: Add automatic learning from web searches
When the KB falls back to web search, save the useful results as new notes. Over time, the KB needs fewer web searches.
def ask_and_learn(question: str) -> dict:
"""Ask a question and save useful web results to local KB."""
result = ask_kb(question)
# If web search was used, save results as notes
if 0.005 == result['cost']:
web_results = [r for r in smart_search(question) if r['source'] == 'web']
for r in web_results[:2]: # Save top 2 web results
add_note(
title=f'[Auto] {r.get("title", question)[:50]}',
content=r['content'],
tags=['auto-learned', 'web-search']
)
print(f'Saved {min(2, len(web_results))} web results to local KB')
return result
# First time: will search web
print('--- First query (web search) ---')
result = ask_and_learn('What is the latest FastAPI version?')
print(f'Cost: ${result["cost"]}')
print()
# Second time: should find local result
print('--- Same query again (should be local) ---')
result = ask_and_learn('What is the latest FastAPI version?')
print(f'Cost: ${result["cost"]}')
print()
print(f'Total notes in KB: {collection.count()}')Python Example
import os, requests, chromadb
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
client = chromadb.PersistentClient(path='./kb')
kb = client.get_or_create_collection('notes')
def add(title, content):
kb.upsert(documents=[f'{title}\n{content}'], ids=[title[:50].replace(' ','-')])
def search(query):
local = kb.query(query_texts=[query], n_results=3)
if local['distances'][0] and local['distances'][0][0] < 0.8:
return [{'text': d, 'source': 'local'} for d in local['documents'][0]]
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': 3})
return [{'text': r.get('snippet',''), 'source': 'web'}
for r in resp.json().get('organic_results', [])]
add('My stack', 'Python + FastAPI + Railway + Vercel')
for r in search('what is my tech stack'):
print(f'[{r["source"]}] {r["text"][:60]}')JavaScript Example
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
// Simple in-memory KB (use a vector DB in production)
const kb = [];
function addNote(title, content) {
kb.push({ title, content, text: `${title} ${content}`.toLowerCase() });
}
function searchLocal(query) {
const q = query.toLowerCase();
return kb.filter(n => q.split(' ').some(w => n.text.includes(w))).slice(0, 3);
}
async function search(query) {
const local = searchLocal(query);
if (local.length > 0) return local.map(n => ({ text: n.content, source: 'local' }));
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us', num_results: 3 })
});
return ((await resp.json()).organic_results || []).map(r => ({ text: r.snippet || '', source: 'web' }));
}
addNote('My stack', 'Python FastAPI Railway Vercel');
search('what is my tech stack').then(r => r.forEach(x => console.log(`[${x.source}] ${x.text.slice(0, 60)}`)));Expected Output
Added: Python project structure
Added: API design preferences
Added: Deployment checklist
Query: python project structure
[LOCAL] Found 3 relevant notes
Query: latest FastAPI release 2026
[WEB] Local KB insufficient, searching web ($0.005)
A: Based on your personal notes, you prefer the src/ layout with
pyproject.toml for Python projects [1]. Tests should go in a tests/
directory at the root level [1].
Sources: ['local', 'local', 'local'], Cost: $0.0
Total notes in KB: 5