RAG pipelines built on static vector stores answer questions from stale data. Adding live search grounding means the LLM always has access to current information when the vector store falls short. This tutorial builds a hybrid retriever that checks the vector store first, then falls back to live search when confidence is low. The search grounding layer uses Scavio to pull from Google, Reddit, and YouTube at $0.005 per query.
Prerequisites
- Python 3.9+ installed
- langchain, langchain-openai, and faiss-cpu installed
- A Scavio API key from scavio.dev
- An OpenAI API key for the LLM
Walkthrough
Step 1: Build the search grounding retriever
Create a retriever that searches the web for real-time context. Unlike a vector store, this always returns current information.
import os, requests
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from typing import List
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
class SearchGroundingRetriever(BaseRetriever):
api_key: str = ''
num_results: int = 5
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.api_key = self.api_key or SCAVIO_KEY
def _get_relevant_documents(self, query: str) -> List[Document]:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': self.api_key, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': self.num_results})
resp.raise_for_status()
return [Document(
page_content=f"{r['title']}\n{r.get('snippet', '')}",
metadata={'source': r['link'], 'type': 'search_grounding'}
) for r in resp.json().get('organic_results', [])]
grounding = SearchGroundingRetriever(num_results=5)
docs = grounding.invoke('latest LangChain features 2026')
print(f'Grounding returned {len(docs)} documents')
for d in docs:
print(f' {d.page_content[:60]}')Step 2: Build the hybrid retriever with fallback logic
Combine vector store retrieval with search grounding. If the vector store returns low-relevance results (short snippets, few matches), automatically supplement with live search.
from langchain_core.retrievers import BaseRetriever
class HybridGroundedRetriever(BaseRetriever):
vector_retriever: BaseRetriever = None
search_retriever: BaseRetriever = None
min_vector_results: int = 2
min_content_length: int = 50
def _get_relevant_documents(self, query: str) -> List[Document]:
# Try vector store first
vector_docs = []
if self.vector_retriever:
vector_docs = self.vector_retriever.invoke(query)
# Check if vector results are sufficient
quality_docs = [d for d in vector_docs
if len(d.page_content) >= self.min_content_length]
if len(quality_docs) >= self.min_vector_results:
return quality_docs
# Supplement with live search grounding
search_docs = self.search_retriever.invoke(query)
# Merge: vector docs first, then search docs
seen_content = set(d.page_content[:50] for d in quality_docs)
for sd in search_docs:
if sd.page_content[:50] not in seen_content:
quality_docs.append(sd)
seen_content.add(sd.page_content[:50])
return quality_docs
# Setup
hybrid = HybridGroundedRetriever(
search_retriever=SearchGroundingRetriever(num_results=5),
min_vector_results=2
)
docs = hybrid.invoke('latest Python release date 2026')
print(f'Hybrid returned {len(docs)} docs')
for d in docs:
source_type = d.metadata.get('type', 'vector')
print(f' [{source_type}] {d.page_content[:50]}')Step 3: Wire into a LangChain QA chain
Connect the hybrid retriever to a RetrievalQA chain. The chain automatically gets grounded answers when the vector store lacks current data.
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type='stuff',
retriever=hybrid,
return_source_documents=True,
chain_type_kwargs={
'prompt': None # Uses default prompt
}
)
def ask(question: str) -> dict:
result = qa_chain.invoke({'query': question})
sources = []
for doc in result.get('source_documents', []):
source_type = doc.metadata.get('type', 'vector')
source_url = doc.metadata.get('source', 'local')
sources.append({'type': source_type, 'url': source_url})
grounded = any(s['type'] == 'search_grounding' for s in sources)
return {
'answer': result['result'],
'grounded': grounded,
'sources': sources,
'cost': 0.005 if grounded else 0
}
result = ask('What are the newest LangChain features in 2026?')
print(f'Answer: {result["answer"][:200]}')
print(f'Grounded: {result["grounded"]}')
print(f'Cost: ${result["cost"]}')
for s in result['sources'][:3]:
print(f' [{s["type"]}] {s["url"]}')Step 4: Add grounding decisions and cost tracking
Track when grounding is triggered and how much it costs. This helps optimize the vector store to reduce unnecessary search calls.
class GroundingTracker:
def __init__(self):
self.total_queries = 0
self.grounded_queries = 0
self.total_cost = 0
self.grounding_triggers = []
def record(self, query: str, grounded: bool, cost: float):
self.total_queries += 1
if grounded:
self.grounded_queries += 1
self.total_cost += cost
self.grounding_triggers.append(query)
def report(self) -> str:
pct = (self.grounded_queries / self.total_queries * 100) if self.total_queries else 0
lines = [
f'Grounding Report',
f'Total queries: {self.total_queries}',
f'Grounded: {self.grounded_queries} ({pct:.0f}%)',
f'Vector-only: {self.total_queries - self.grounded_queries}',
f'Search cost: ${self.total_cost:.3f}',
f'',
f'Recent grounding triggers:'
]
for q in self.grounding_triggers[-5:]:
lines.append(f' - {q}')
return '\n'.join(lines)
tracker = GroundingTracker()
test_queries = [
'What is a Python decorator?', # Vector store likely has this
'Latest Python 3.15 release date', # Needs grounding
'LangChain v0.4 breaking changes 2026', # Needs grounding
]
for q in test_queries:
result = ask(q)
tracker.record(q, result['grounded'], result['cost'])
print(tracker.report())Python Example
import os, requests
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from typing import List
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
class SearchGroundingRetriever(BaseRetriever):
api_key: str = ''
num_results: int = 5
def __init__(self, **kw):
super().__init__(**kw)
self.api_key = self.api_key or SCAVIO_KEY
def _get_relevant_documents(self, query: str) -> List[Document]:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': self.api_key, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': self.num_results})
return [Document(page_content=f"{r['title']}\n{r.get('snippet','')}",
metadata={'source': r['link']}) for r in resp.json().get('organic_results', [])]
retriever = SearchGroundingRetriever()
docs = retriever.invoke('LangChain RAG grounding 2026')
for d in docs:
print(f"{d.page_content[:60]}\n {d.metadata['source']}")JavaScript Example
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
async function searchGrounding(query, num = 5) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us', num_results: num })
});
return (await resp.json()).organic_results?.map(r => ({
pageContent: `${r.title}\n${r.snippet || ''}`,
metadata: { source: r.link, type: 'search_grounding' }
})) || [];
}
async function hybridRetrieve(query, vectorDocs = []) {
if (vectorDocs.length >= 2) return vectorDocs;
const searchDocs = await searchGrounding(query);
return [...vectorDocs, ...searchDocs];
}
hybridRetrieve('LangChain features 2026').then(docs => {
docs.forEach(d => console.log(`[${d.metadata.type}] ${d.pageContent.slice(0, 50)}`));
});Expected Output
Grounding returned 5 documents
Latest LangChain Features and Updates 2026
LangChain v0.4 Release Notes
Hybrid returned 5 docs
[search_grounding] Latest Python 3.15 Released October
Grounding Report
Total queries: 3
Grounded: 2 (67%)
Vector-only: 1
Search cost: $0.010
Recent grounding triggers:
- Latest Python 3.15 release date
- LangChain v0.4 breaking changes 2026