本地运行的个人知识库可保护您的数据私密性并离线工作。但仅限本地的系统会根据过时的数据进行回答。添加搜索层意味着您当地的法学硕士可以在需要当前信息时检查网络,同时保持其他所有内容的私密性。本教程构建一个个人知识库,在本地存储笔记,首先从笔记中获取答案,并在本地知识不足时回退到 Scavio 搜索(0.005 美元/查询)。
前置条件
- Ollama 使用模型(llama3 或 Mistra)在本地运行
- 已安装 Python 3.9+
- 安装了 requests 和 chromadb 库
- 来自 scavio.dev 的 Scavio API 密钥
操作指南
步骤 1: 为个人笔记设置本地矢量存储
使用 ChromaDB 在本地存储和搜索您的个人笔记。这是保持私密性的主要知识来源。
import chromadb
import os
# Create a persistent local vector store
client = chromadb.PersistentClient(path='./personal_kb')
collection = client.get_or_create_collection('knowledge')
def add_note(title: str, content: str, tags: list = None):
"""Add a note to the personal knowledge base."""
doc_id = title.lower().replace(' ', '-')[:50]
metadata = {'title': title, 'tags': ','.join(tags or [])}
collection.upsert(
documents=[f'{title}\n{content}'],
metadatas=[metadata],
ids=[doc_id]
)
print(f'Added: {title}')
def search_local(query: str, n: int = 3) -> list:
"""Search the local knowledge base."""
results = collection.query(query_texts=[query], n_results=n)
docs = []
for i, doc in enumerate(results['documents'][0]):
meta = results['metadatas'][0][i]
distance = results['distances'][0][i]
docs.append({'content': doc, 'title': meta.get('title', ''),
'distance': distance, 'source': 'local'})
return docs
# Add some personal notes
add_note('Python project structure', 'I prefer src/ layout with pyproject.toml. Tests go in tests/ at root level.', ['python', 'setup'])
add_note('API design preferences', 'Always use POST for search endpoints. Return JSON with consistent error format.', ['api', 'design'])
add_note('Deployment checklist', 'Railway for APIs, Vercel for frontends, Cloudflare for workers.', ['deploy'])
results = search_local('how do I structure python projects')
print(f'\nFound {len(results)} local results')
for r in results:
print(f' [{r["distance"]:.3f}] {r["title"]}')步骤 2: 添加网络搜索后备
当本地知识不足时(距离分数高或没有结果),请通过 Scavio 进行网络搜索。
import requests
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
def search_web(query: str, num: int = 5) -> list:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': num})
return [{'content': f"{r['title']}\n{r.get('snippet', '')}",
'title': r['title'], 'url': r['link'],
'source': 'web'} for r in resp.json().get('organic_results', [])]
def smart_search(query: str, local_threshold: float = 1.0) -> list:
"""Search local KB first, fall back to web if needed."""
local_results = search_local(query)
# Check if local results are good enough
if local_results and local_results[0]['distance'] < local_threshold:
print(f'[LOCAL] Found {len(local_results)} relevant notes')
return local_results
# Fall back to web search
print(f'[WEB] Local KB insufficient, searching web ($0.005)')
web_results = search_web(query)
return local_results + web_results
# Test: should find local result
print('Query: python project structure')
results = smart_search('python project structure')
print()
# Test: should fall back to web
print('Query: latest FastAPI release 2026')
results = smart_search('latest FastAPI release 2026')步骤 3: 连接到本地 LLM
将检索到的上下文发送给 Ollama 进行答复。法学硕士在可用时获取本地注释,并在需要时获取网络结果。
LLM_URL = 'http://localhost:11434/v1/chat/completions'
def ask_kb(question: str) -> dict:
"""Ask a question to the personal KB."""
results = smart_search(question)
# Build context
context_parts = []
for i, r in enumerate(results, 1):
source = r['source']
if source == 'local':
context_parts.append(f'[{i}] (Personal Note) {r["content"]}')
else:
context_parts.append(f'[{i}] (Web) {r["content"]}')
context = '\n\n'.join(context_parts)
messages = [
{'role': 'system', 'content': (
'You are a personal assistant with access to the user\'s notes and web search. '
'Prefer personal notes when relevant. Cite sources as [1], [2], etc. '
'Mark whether each source is from personal notes or web search.'
)},
{'role': 'user', 'content': f'Context:\n{context}\n\nQuestion: {question}'}
]
resp = requests.post(LLM_URL, json={
'model': 'llama3', 'messages': messages, 'max_tokens': 512
})
answer = resp.json()['choices'][0]['message']['content']
used_web = any(r['source'] == 'web' for r in results)
return {
'answer': answer,
'sources': [r['source'] for r in results],
'cost': 0.005 if used_web else 0,
}
result = ask_kb('How should I structure a new Python project?')
print(f'A: {result["answer"]}')
print(f'Sources: {result["sources"]}, Cost: ${result["cost"]}')步骤 4: 添加从网络搜索中自动学习
当知识库回退到网络搜索时,将有用的结果保存为新笔记。随着时间的推移,知识库需要的网络搜索次数会减少。
def ask_and_learn(question: str) -> dict:
"""Ask a question and save useful web results to local KB."""
result = ask_kb(question)
# If web search was used, save results as notes
if 0.005 == result['cost']:
web_results = [r for r in smart_search(question) if r['source'] == 'web']
for r in web_results[:2]: # Save top 2 web results
add_note(
title=f'[Auto] {r.get("title", question)[:50]}',
content=r['content'],
tags=['auto-learned', 'web-search']
)
print(f'Saved {min(2, len(web_results))} web results to local KB')
return result
# First time: will search web
print('--- First query (web search) ---')
result = ask_and_learn('What is the latest FastAPI version?')
print(f'Cost: ${result["cost"]}')
print()
# Second time: should find local result
print('--- Same query again (should be local) ---')
result = ask_and_learn('What is the latest FastAPI version?')
print(f'Cost: ${result["cost"]}')
print()
print(f'Total notes in KB: {collection.count()}')Python 示例
import os, requests, chromadb
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
client = chromadb.PersistentClient(path='./kb')
kb = client.get_or_create_collection('notes')
def add(title, content):
kb.upsert(documents=[f'{title}\n{content}'], ids=[title[:50].replace(' ','-')])
def search(query):
local = kb.query(query_texts=[query], n_results=3)
if local['distances'][0] and local['distances'][0][0] < 0.8:
return [{'text': d, 'source': 'local'} for d in local['documents'][0]]
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': 3})
return [{'text': r.get('snippet',''), 'source': 'web'}
for r in resp.json().get('organic_results', [])]
add('My stack', 'Python + FastAPI + Railway + Vercel')
for r in search('what is my tech stack'):
print(f'[{r["source"]}] {r["text"][:60]}')JavaScript 示例
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
// Simple in-memory KB (use a vector DB in production)
const kb = [];
function addNote(title, content) {
kb.push({ title, content, text: `${title} ${content}`.toLowerCase() });
}
function searchLocal(query) {
const q = query.toLowerCase();
return kb.filter(n => q.split(' ').some(w => n.text.includes(w))).slice(0, 3);
}
async function search(query) {
const local = searchLocal(query);
if (local.length > 0) return local.map(n => ({ text: n.content, source: 'local' }));
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us', num_results: 3 })
});
return ((await resp.json()).organic_results || []).map(r => ({ text: r.snippet || '', source: 'web' }));
}
addNote('My stack', 'Python FastAPI Railway Vercel');
search('what is my tech stack').then(r => r.forEach(x => console.log(`[${x.source}] ${x.text.slice(0, 60)}`)));预期输出
Added: Python project structure
Added: API design preferences
Added: Deployment checklist
Query: python project structure
[LOCAL] Found 3 relevant notes
Query: latest FastAPI release 2026
[WEB] Local KB insufficient, searching web ($0.005)
A: Based on your personal notes, you prefer the src/ layout with
pyproject.toml for Python projects [1]. Tests should go in a tests/
directory at the root level [1].
Sources: ['local', 'local', 'local'], Cost: $0.0
Total notes in KB: 5