RAG pipelines fail silently when the vector store does not contain relevant documents: the LLM receives poor context and generates a plausible-sounding but wrong answer. A live search fallback catches these failures by checking retrieval confidence and routing to web search when the vector store comes up short. This tutorial adds a fallback layer to any existing RAG pipeline that detects low-quality retrieval and transparently switches to live SERP data. The fallback triggers only when needed, keeping costs minimal at $0.005 per search call.
Prerequisites
- Python 3.9+ installed
- An existing RAG pipeline with a vector store
- requests library installed
- A Scavio API key from scavio.dev
Walkthrough
Step 1: Build the confidence scoring function
Score how well your vector retrieval results match the query. Low scores trigger the search fallback. Use similarity scores from your vector store or a simple heuristic.
def score_retrieval_quality(query: str, documents: list, scores: list = None) -> float:
"""Score retrieval quality from 0 (terrible) to 1 (excellent)."""
if not documents:
return 0.0
# If vector store provides similarity scores, use them
if scores:
avg_score = sum(scores) / len(scores)
return min(avg_score, 1.0)
# Heuristic: check keyword overlap between query and docs
query_words = set(query.lower().split())
total_overlap = 0
for doc in documents:
doc_words = set(doc.lower().split()[:200]) # first 200 words
overlap = len(query_words & doc_words) / max(len(query_words), 1)
total_overlap += overlap
avg_overlap = total_overlap / len(documents)
return min(avg_overlap, 1.0)
# Example:
score = score_retrieval_quality(
'latest python version 2026',
['Python 3.12 was released in October 2023 with improved performance.']
)
print(f'Retrieval confidence: {score:.2f}') # Low because doc is outdatedStep 2: Build the search fallback function
When retrieval confidence is below a threshold, fetch live search results and format them as documents for the LLM context.
import requests, os
API_KEY = os.environ['SCAVIO_API_KEY']
def search_fallback(query: str, k: int = 5) -> list:
"""Fetch live search results as fallback documents."""
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us'})
resp.raise_for_status()
results = resp.json().get('organic_results', [])[:k]
return [{
'content': f'{r["title"]}\n{r.get("snippet", "")}',
'source': r['link'],
'retriever': 'live_search'
} for r in results]
# Test:
fallback_docs = search_fallback('latest python version 2026')
for doc in fallback_docs[:2]:
print(f'[{doc["retriever"]}] {doc["content"][:80]}...')Step 3: Build the fallback-aware retrieval function
Wrap your existing vector retrieval with the confidence check and fallback logic. This is the only function you need to change in your pipeline.
CONFIDENCE_THRESHOLD = 0.3 # below this, trigger fallback
def retrieve_with_fallback(query: str, vector_store, k: int = 5) -> dict:
"""Retrieve from vector store; fall back to search if confidence is low."""
# Step 1: Try vector retrieval
vector_results = vector_store.similarity_search_with_score(query, k=k)
docs = [doc.page_content for doc, score in vector_results]
scores = [score for doc, score in vector_results]
confidence = score_retrieval_quality(query, docs, scores)
# Step 2: Decide retrieval strategy
if confidence >= CONFIDENCE_THRESHOLD and docs:
return {
'documents': [{'content': d, 'retriever': 'vector'} for d in docs],
'strategy': 'vector',
'confidence': round(confidence, 3),
'search_cost': 0
}
# Step 3: Fallback to live search
search_docs = search_fallback(query, k=k)
return {
'documents': search_docs + [{'content': d, 'retriever': 'vector'} for d in docs[:2]],
'strategy': 'search_fallback',
'confidence': round(confidence, 3),
'search_cost': 0.005
}Step 4: Integrate into your existing RAG chain
Replace your current retrieval step with the fallback-aware version. The rest of your pipeline (prompt building, LLM call, output parsing) stays the same.
def rag_with_fallback(query: str, vector_store, llm) -> dict:
# Retrieve with fallback
retrieval = retrieve_with_fallback(query, vector_store)
documents = retrieval['documents']
# Build context
context = '\n\n'.join(d['content'] for d in documents)
sources = [d.get('source', 'vector store') for d in documents if d.get('source')]
# Generate answer
prompt = f"""Answer based on the following context. If using web results, cite the URLs.
Context:
{context}
Question: {query}
Answer:"""
# Assuming llm is a callable that returns text
answer = llm(prompt)
return {
'answer': answer,
'strategy': retrieval['strategy'],
'confidence': retrieval['confidence'],
'sources': sources,
'cost': retrieval['search_cost']
}
# Usage stays the same as before:
# result = rag_with_fallback(user_query, my_vector_store, my_llm)
# print(result['answer'])
# print(f'Strategy: {result["strategy"]}, Cost: ${result["cost"]}')Step 5: Monitor fallback rates and costs
Track how often the fallback triggers to understand your vector store's coverage gaps and plan improvements.
from collections import defaultdict
fallback_stats = defaultdict(int)
def tracked_rag(query: str, vector_store, llm) -> dict:
result = rag_with_fallback(query, vector_store, llm)
fallback_stats['total'] += 1
fallback_stats[result['strategy']] += 1
fallback_stats['total_cost'] += result['cost']
return result
def print_fallback_report():
total = fallback_stats['total']
if total == 0:
print('No queries tracked yet.')
return
vector_pct = fallback_stats.get('vector', 0) / total * 100
fallback_pct = fallback_stats.get('search_fallback', 0) / total * 100
print(f'RAG Fallback Report:')
print(f' Total queries: {total}')
print(f' Vector retrieval: {vector_pct:.0f}%')
print(f' Search fallback: {fallback_pct:.0f}%')
print(f' Total search cost: ${fallback_stats["total_cost"]:.2f}')
print(f' Avg cost/query: ${fallback_stats["total_cost"] / total:.4f}')
if fallback_pct > 30:
print(f' NOTE: High fallback rate. Consider adding more documents to your vector store.')
# After running many queries:
# print_fallback_report()Python Example
import os, requests
API_KEY = os.environ['SCAVIO_API_KEY']
def search_fallback(query, k=5):
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us'})
return [{'content': f'{r["title"]}\n{r.get("snippet", "")}', 'source': r['link']}
for r in resp.json().get('organic_results', [])[:k]]
def retrieve(query, vector_docs, confidence):
if confidence >= 0.3 and vector_docs:
return {'docs': vector_docs, 'strategy': 'vector', 'cost': 0}
fallback = search_fallback(query)
return {'docs': fallback + vector_docs[:2], 'strategy': 'fallback', 'cost': 0.005}
# Simulate low-confidence retrieval:
result = retrieve('Python 3.14 release date 2026', ['Python 3.12 docs...'], 0.15)
print(f'Strategy: {result["strategy"]}, docs: {len(result["docs"])}, cost: ${result["cost"]}')JavaScript Example
const API_KEY = process.env.SCAVIO_API_KEY;
async function searchFallback(query, k = 5) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': API_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us' })
});
const data = await resp.json();
return (data.organic_results || []).slice(0, k)
.map(r => ({ content: `${r.title}\n${r.snippet || ''}`, source: r.link }));
}
async function retrieve(query, vectorDocs, confidence) {
if (confidence >= 0.3 && vectorDocs.length) {
return { docs: vectorDocs, strategy: 'vector', cost: 0 };
}
const fallback = await searchFallback(query);
return { docs: [...fallback, ...vectorDocs.slice(0, 2)], strategy: 'fallback', cost: 0.005 };
}
retrieve('Python 3.14 release 2026', ['old docs'], 0.1)
.then(r => console.log(`Strategy: ${r.strategy}, docs: ${r.docs.length}`));Expected Output
Retrieval confidence: 0.12
[live_search] Python Release Python 3.14.0 -- Python 3.14.0 was released on...
[live_search] What's New In Python 3.14 -- This article explains the new...
Strategy: search_fallback, docs: 7, cost: $0.005
RAG Fallback Report:
Total queries: 100
Vector retrieval: 72%
Search fallback: 28%
Total search cost: $0.14
Avg cost/query: $0.0014