Une base de connaissances personnelle qui fonctionne localement garde vos données privées et fonctionne hors ligne. Mais les systèmes uniquement locaux répondent avec des données obsolètes. Ajouter une couche de recherche permet à votre LLM local de consulter le web lorsqu'il a besoin d'informations actuelles tout en gardant le reste privé. Ce tutoriel construit une base de connaissances personnelle qui stocke les notes localement, répond d'abord à partir de celles-ci, et utilise Scavio comme recherche de secours (0,005 $/requête) lorsque les connaissances locales sont insuffisantes.
Prérequis
- Ollama fonctionnant localement avec un modèle (llama3 ou mistral)
- Python 3.9+ installé
- bibliothèques requests et chromadb installées
- Une clé API Scavio de scavio.dev
Parcours
Étape 1: Configurer le stockage vectoriel local pour les notes personnelles
Utilisez ChromaDB pour stocker et rechercher vos notes personnelles localement. C'est la source de connaissance principale qui reste privée.
import chromadb
import os
# Create a persistent local vector store
client = chromadb.PersistentClient(path='./personal_kb')
collection = client.get_or_create_collection('knowledge')
def add_note(title: str, content: str, tags: list = None):
"""Add a note to the personal knowledge base."""
doc_id = title.lower().replace(' ', '-')[:50]
metadata = {'title': title, 'tags': ','.join(tags or [])}
collection.upsert(
documents=[f'{title}\n{content}'],
metadatas=[metadata],
ids=[doc_id]
)
print(f'Added: {title}')
def search_local(query: str, n: int = 3) -> list:
"""Search the local knowledge base."""
results = collection.query(query_texts=[query], n_results=n)
docs = []
for i, doc in enumerate(results['documents'][0]):
meta = results['metadatas'][0][i]
distance = results['distances'][0][i]
docs.append({'content': doc, 'title': meta.get('title', ''),
'distance': distance, 'source': 'local'})
return docs
# Add some personal notes
add_note('Python project structure', 'I prefer src/ layout with pyproject.toml. Tests go in tests/ at root level.', ['python', 'setup'])
add_note('API design preferences', 'Always use POST for search endpoints. Return JSON with consistent error format.', ['api', 'design'])
add_note('Deployment checklist', 'Railway for APIs, Vercel for frontends, Cloudflare for workers.', ['deploy'])
results = search_local('how do I structure python projects')
print(f'\nFound {len(results)} local results')
for r in results:
print(f' [{r["distance"]:.3f}] {r["title"]}')Étape 2: Ajouter la recherche web de secours
Lorsque les connaissances locales sont insuffisantes (score de distance élevé ou aucun résultat), utilisez la recherche web via Scavio comme solution de repli.
import requests
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
def search_web(query: str, num: int = 5) -> list:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': num})
return [{'content': f"{r['title']}\n{r.get('snippet', '')}",
'title': r['title'], 'url': r['link'],
'source': 'web'} for r in resp.json().get('organic_results', [])]
def smart_search(query: str, local_threshold: float = 1.0) -> list:
"""Search local KB first, fall back to web if needed."""
local_results = search_local(query)
# Check if local results are good enough
if local_results and local_results[0]['distance'] < local_threshold:
print(f'[LOCAL] Found {len(local_results)} relevant notes')
return local_results
# Fall back to web search
print(f'[WEB] Local KB insufficient, searching web ($0.005)')
web_results = search_web(query)
return local_results + web_results
# Test: should find local result
print('Query: python project structure')
results = smart_search('python project structure')
print()
# Test: should fall back to web
print('Query: latest FastAPI release 2026')
results = smart_search('latest FastAPI release 2026')Étape 3: Connecter au LLM local
Envoyez le contexte récupéré à Ollama pour répondre. Le LLM reçoit les notes locales lorsqu'elles sont disponibles et les résultats web lorsque nécessaire.
LLM_URL = 'http://localhost:11434/v1/chat/completions'
def ask_kb(question: str) -> dict:
"""Ask a question to the personal KB."""
results = smart_search(question)
# Build context
context_parts = []
for i, r in enumerate(results, 1):
source = r['source']
if source == 'local':
context_parts.append(f'[{i}] (Personal Note) {r["content"]}')
else:
context_parts.append(f'[{i}] (Web) {r["content"]}')
context = '\n\n'.join(context_parts)
messages = [
{'role': 'system', 'content': (
'You are a personal assistant with access to the user\'s notes and web search. '
'Prefer personal notes when relevant. Cite sources as [1], [2], etc. '
'Mark whether each source is from personal notes or web search.'
)},
{'role': 'user', 'content': f'Context:\n{context}\n\nQuestion: {question}'}
]
resp = requests.post(LLM_URL, json={
'model': 'llama3', 'messages': messages, 'max_tokens': 512
})
answer = resp.json()['choices'][0]['message']['content']
used_web = any(r['source'] == 'web' for r in results)
return {
'answer': answer,
'sources': [r['source'] for r in results],
'cost': 0.005 if used_web else 0,
}
result = ask_kb('How should I structure a new Python project?')
print(f'A: {result["answer"]}')
print(f'Sources: {result["sources"]}, Cost: ${result["cost"]}')Étape 4: Ajouter l'apprentissage automatique à partir des recherches web
Lorsque la base de connaissances utilise la recherche web de secours, enregistrez les résultats utiles comme de nouvelles notes. Avec le temps, la base de connaissances aura besoin de moins de recherches web.
def ask_and_learn(question: str) -> dict:
"""Ask a question and save useful web results to local KB."""
result = ask_kb(question)
# If web search was used, save results as notes
if 0.005 == result['cost']:
web_results = [r for r in smart_search(question) if r['source'] == 'web']
for r in web_results[:2]: # Save top 2 web results
add_note(
title=f'[Auto] {r.get("title", question)[:50]}',
content=r['content'],
tags=['auto-learned', 'web-search']
)
print(f'Saved {min(2, len(web_results))} web results to local KB')
return result
# First time: will search web
print('--- First query (web search) ---')
result = ask_and_learn('What is the latest FastAPI version?')
print(f'Cost: ${result["cost"]}')
print()
# Second time: should find local result
print('--- Same query again (should be local) ---')
result = ask_and_learn('What is the latest FastAPI version?')
print(f'Cost: ${result["cost"]}')
print()
print(f'Total notes in KB: {collection.count()}')Exemple Python
import os, requests, chromadb
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
client = chromadb.PersistentClient(path='./kb')
kb = client.get_or_create_collection('notes')
def add(title, content):
kb.upsert(documents=[f'{title}\n{content}'], ids=[title[:50].replace(' ','-')])
def search(query):
local = kb.query(query_texts=[query], n_results=3)
if local['distances'][0] and local['distances'][0][0] < 0.8:
return [{'text': d, 'source': 'local'} for d in local['documents'][0]]
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': 3})
return [{'text': r.get('snippet',''), 'source': 'web'}
for r in resp.json().get('organic_results', [])]
add('My stack', 'Python + FastAPI + Railway + Vercel')
for r in search('what is my tech stack'):
print(f'[{r["source"]}] {r["text"][:60]}')Exemple JavaScript
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
// Simple in-memory KB (use a vector DB in production)
const kb = [];
function addNote(title, content) {
kb.push({ title, content, text: `${title} ${content}`.toLowerCase() });
}
function searchLocal(query) {
const q = query.toLowerCase();
return kb.filter(n => q.split(' ').some(w => n.text.includes(w))).slice(0, 3);
}
async function search(query) {
const local = searchLocal(query);
if (local.length > 0) return local.map(n => ({ text: n.content, source: 'local' }));
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us', num_results: 3 })
});
return ((await resp.json()).organic_results || []).map(r => ({ text: r.snippet || '', source: 'web' }));
}
addNote('My stack', 'Python FastAPI Railway Vercel');
search('what is my tech stack').then(r => r.forEach(x => console.log(`[${x.source}] ${x.text.slice(0, 60)}`)));Sortie attendue
Added: Python project structure
Added: API design preferences
Added: Deployment checklist
Query: python project structure
[LOCAL] Found 3 relevant notes
Query: latest FastAPI release 2026
[WEB] Local KB insufficient, searching web ($0.005)
A: Based on your personal notes, you prefer the src/ layout with
pyproject.toml for Python projects [1]. Tests should go in a tests/
directory at the root level [1].
Sources: ['local', 'local', 'local'], Cost: $0.0
Total notes in KB: 5