Le RAG basé sur la recherche remplace un magasin de vecteurs par un appel API de recherche en direct. Au lieu d'embedding des documents et de faire une recherche par similarité, vous interrogez le web (ou une plateforme) pour du contenu pertinent et injectez les résultats directement dans le contexte du LLM.
Prérequis
- Python 3.9+
- Clé API Scavio
- anthropic SDK
Parcours
Étape 1: Comprendre la différence de modèle
RAG traditionnel : embedding des documents -> stockage des vecteurs -> recherche par similarité -> injection. RAG par recherche : requête API -> obtention des extraits -> injection. Pas d'étape d'embedding, pas de base de vecteurs.
# Traditional vector RAG (what you're replacing)
# 1. Embed your corpus (expensive, slow)
# 2. Store in Pinecone/Chroma/Weaviate
# 3. Embed the query
# 4. Cosine similarity search
# 5. Retrieve top-k chunks
# 6. Inject into prompt
# Search RAG (this tutorial)
# 1. Convert question to search query
# 2. Call search API (1 API call, 1 credit)
# 3. Extract top snippets
# 4. Inject into prompt
# Done. Fresh data. No embedding costs.Étape 2: Construire la fonction de récupération
Le récupérateur prend une question, recherche du contenu pertinent et renvoie des extraits formatés.
import requests
SCAVIO_KEY = "your-scavio-api-key"
def retrieve(question: str, num_results: int = 5, platform: str = None) -> list[dict]:
payload = {"query": question, "num_results": num_results}
if platform:
payload["platform"] = platform
r = requests.post(
"https://api.scavio.dev/api/v1/search",
json=payload,
headers={"x-api-key": SCAVIO_KEY},
timeout=15
)
r.raise_for_status()
results = r.json().get("organic_results", [])
return [{"title": res["title"], "snippet": res.get("snippet", ""), "url": res["link"]} for res in results]Étape 3: Formater les documents récupérés en tant que contexte
Convertir les résultats de recherche en un bloc de contexte que le LLM peut référencer.
def format_context(docs: list[dict]) -> str:
lines = []
for i, doc in enumerate(docs, 1):
lines.append(f"[{i}] {doc['title']}\nURL: {doc['url']}\n{doc['snippet']}")
return "\n---\n".join(lines)Étape 4: Générer une réponse avec Anthropic Claude
Injecter le contexte récupéré dans le prompt et obtenir une réponse fondée.
import anthropic
ANTHROPIC_KEY = "your-anthropic-key"
def rag_answer(question: str, platform: str = None) -> dict:
docs = retrieve(question, num_results=5, platform=platform)
context = format_context(docs)
prompt = f"""Use the following search results to answer the question. Cite sources by number.
{context}
Question: {question}
Answer:"""
client = anthropic.Anthropic(api_key=ANTHROPIC_KEY)
msg = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}]
)
return {"answer": msg.content[0].text, "sources": [d["url"] for d in docs]}
result = rag_answer("What are the latest AI models released in 2026?")
print(result["answer"])
print("\nSources:", result["sources"])Exemple Python
import requests
import anthropic
SCAVIO_KEY = "your-scavio-api-key"
ANTHROPIC_KEY = "your-anthropic-key"
def retrieve(question: str, n: int = 5, platform: str = None) -> list[dict]:
payload = {"query": question, "num_results": n}
if platform:
payload["platform"] = platform
r = requests.post(
"https://api.scavio.dev/api/v1/search",
json=payload,
headers={"x-api-key": SCAVIO_KEY},
timeout=15
)
r.raise_for_status()
return [{"title": d["title"], "snippet": d.get("snippet",""), "url": d["link"]}
for d in r.json().get("organic_results", [])]
def format_context(docs: list) -> str:
return "\n---\n".join(f"[{i}] {d['title']}\n{d['snippet']}\n{d['url']}" for i, d in enumerate(docs, 1))
def rag_answer(question: str, platform: str = None) -> dict:
docs = retrieve(question, n=5, platform=platform)
context = format_context(docs)
prompt = f"Use these search results to answer. Cite source numbers.\n\n{context}\n\nQuestion: {question}\nAnswer:"
client = anthropic.Anthropic(api_key=ANTHROPIC_KEY)
msg = client.messages.create(model="claude-sonnet-4-6", max_tokens=1024,
messages=[{"role": "user", "content": prompt}])
return {"answer": msg.content[0].text, "sources": [d["url"] for d in docs]}
if __name__ == "__main__":
questions = [
"What are the most popular vector databases in 2026?",
"Latest AI coding assistants compared"
]
for q in questions:
result = rag_answer(q)
print(f"Q: {q}")
print(f"A: {result['answer'][:300]}...\n")Exemple JavaScript
const SCAVIO_KEY = 'your-scavio-api-key';
const ANTHROPIC_KEY = 'your-anthropic-key';
async function retrieve(question, n = 5, platform = null) {
const payload = { query: question, num_results: n };
if (platform) payload.platform = platform;
const res = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'Content-Type': 'application/json', 'x-api-key': SCAVIO_KEY },
body: JSON.stringify(payload)
});
const data = await res.json();
return (data.organic_results ?? []).map(d => ({ title: d.title, snippet: d.snippet ?? '', url: d.link }));
}
function formatContext(docs) {
return docs.map((d, i) => `[${i+1}] ${d.title}\n${d.snippet}\n${d.url}`).join('\n---\n');
}
async function ragAnswer(question, platform = null) {
const docs = await retrieve(question, 5, platform);
const context = formatContext(docs);
const prompt = `Use these search results to answer. Cite source numbers.\n\n${context}\n\nQuestion: ${question}\nAnswer:`;
const res = await fetch('https://api.anthropic.com/v1/messages', {
method: 'POST',
headers: { 'Content-Type': 'application/json', 'x-api-key': ANTHROPIC_KEY, 'anthropic-version': '2023-06-01' },
body: JSON.stringify({ model: 'claude-sonnet-4-6', max_tokens: 1024, messages: [{ role: 'user', content: prompt }] })
});
const msg = await res.json();
return { answer: msg.content[0].text, sources: docs.map(d => d.url) };
}
const result = await ragAnswer('What are the most popular vector databases in 2026?');
console.log(result.answer);Sortie attendue
Based on the search results, the most popular vector databases in 2026 include:
1. Pinecone - serverless, widely used in production [1]
2. Weaviate - open source with hybrid search [2]
3. Qdrant - performance-focused, Rust-based [3]
4. Chroma - popular for local development [4]
5. pgvector - PostgreSQL extension for teams already using Postgres [5]
Sources: https://pinecone.io, https://weaviate.io, ...