LangChain RAG pipelines commonly use TavilySearchResults as the web search retriever. After the Nebius acquisition, switching to an independent provider reduces vendor risk. Scavio drops into the same pipeline slot: define a custom tool with the same interface, and your chain continues working. This tutorial shows the exact code changes for LangChain RetrievalQA, create_retrieval_chain, and agent-based RAG patterns.
Prerequisites
- Python 3.9+ installed
- langchain and langchain-core packages installed
- A Scavio API key from scavio.dev
- An LLM API key (OpenAI or Anthropic)
Walkthrough
Step 1: Create the Scavio search retriever
Build a LangChain-compatible retriever that calls the Scavio API. It returns Document objects with page_content and metadata, matching what TavilySearchResults provides.
import os, requests
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from typing import List
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
class ScavioRetriever(BaseRetriever):
api_key: str = ''
num_results: int = 5
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.api_key = self.api_key or SCAVIO_KEY
def _get_relevant_documents(self, query: str) -> List[Document]:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': self.api_key, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': self.num_results})
resp.raise_for_status()
docs = []
for r in resp.json().get('organic_results', []):
docs.append(Document(
page_content=f"{r['title']}\n{r.get('snippet', '')}",
metadata={'source': r['link'], 'title': r['title']}
))
return docs
retriever = ScavioRetriever(num_results=5)
docs = retriever.invoke('LangChain RAG tutorial 2026')
for d in docs:
print(f'{d.metadata["title"][:50]}\n {d.metadata["source"]}')Step 2: Replace TavilySearchResults in your chain
Swap TavilySearchResults for a Scavio tool in agent-based RAG. The tool wrapper maintains the same interface so your agent config stays unchanged.
from langchain_core.tools import tool
@tool
def web_search(query: str) -> str:
"""Search the web for current information. Returns relevant results with titles, snippets, and URLs."""
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': 5})
results = resp.json().get('organic_results', [])
return '\n\n'.join(
f'Title: {r["title"]}\nContent: {r.get("snippet", "")}\nSource: {r["link"]}'
for r in results
)
# BEFORE:
# from langchain_community.tools.tavily_search import TavilySearchResults
# tools = [TavilySearchResults(max_results=5)]
# AFTER:
tools = [web_search]
result = web_search.invoke('LangChain vs LlamaIndex 2026')
print(result[:300])Step 3: Update a RetrievalQA chain
If you use RetrievalQA with Tavily as the retriever, swap in the ScavioRetriever. The chain interface stays identical.
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
# BEFORE:
# from langchain_community.retrievers import TavilySearchAPIRetriever
# retriever = TavilySearchAPIRetriever(k=5)
# AFTER:
retriever = ScavioRetriever(num_results=5)
# The rest of your chain stays exactly the same
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type='stuff',
retriever=retriever,
return_source_documents=True
)
# Run a query
result = qa_chain.invoke({'query': 'What are the best RAG frameworks in 2026?'})
print(f'Answer: {result["result"][:200]}')
print(f'\nSources:')
for doc in result['source_documents'][:3]:
print(f' - {doc.metadata["source"]}')Step 4: Add multi-platform retrieval for richer RAG
Go beyond what Tavily offered by adding Reddit and YouTube sources to your RAG pipeline. This gives the LLM community opinions and video references alongside web results.
class MultiPlatformRetriever(BaseRetriever):
api_key: str = ''
platforms: list = ['google', 'reddit']
results_per_platform: int = 3
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.api_key = self.api_key or SCAVIO_KEY
def _get_relevant_documents(self, query: str) -> List[Document]:
docs = []
site_map = {'reddit': 'reddit.com', 'youtube': 'youtube.com'}
for platform in self.platforms:
q = f'site:{site_map[platform]} {query}' if platform in site_map else query
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': self.api_key, 'Content-Type': 'application/json'},
json={'query': q, 'country_code': 'us', 'num_results': self.results_per_platform})
for r in resp.json().get('organic_results', []):
docs.append(Document(
page_content=f'[{platform.upper()}] {r["title"]}\n{r.get("snippet", "")}',
metadata={'source': r['link'], 'title': r['title'], 'platform': platform}
))
return docs
retriever = MultiPlatformRetriever(platforms=['google', 'reddit', 'youtube'])
docs = retriever.invoke('best python web framework 2026')
for d in docs:
print(f'[{d.metadata["platform"]}] {d.metadata["title"][:50]}')
print(f'\nCost: {len(set(d.metadata["platform"] for d in docs))} API calls = ${len(set(d.metadata["platform"] for d in docs)) * 0.005:.3f}')Python Example
import os, requests
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from typing import List
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
class ScavioRetriever(BaseRetriever):
api_key: str = ''
num_results: int = 5
def __init__(self, **kw):
super().__init__(**kw)
self.api_key = self.api_key or SCAVIO_KEY
def _get_relevant_documents(self, query: str) -> List[Document]:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': self.api_key, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': self.num_results})
return [Document(page_content=f"{r['title']}\n{r.get('snippet','')}",
metadata={'source': r['link']}) for r in resp.json().get('organic_results', [])]
retriever = ScavioRetriever(num_results=5)
docs = retriever.invoke('RAG frameworks 2026')
for d in docs:
print(f"{d.page_content[:60]}\n {d.metadata['source']}")JavaScript Example
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
async function searchForRAG(query, num = 5) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us', num_results: num })
});
const data = await resp.json();
return (data.organic_results || []).map(r => ({
pageContent: `${r.title}\n${r.snippet || ''}`,
metadata: { source: r.link, title: r.title }
}));
}
searchForRAG('RAG frameworks 2026').then(docs => {
docs.forEach(d => console.log(`${d.pageContent.slice(0, 60)}\n ${d.metadata.source}`));
});Expected Output
LangChain RAG Tutorial: Complete Guide 2026
https://example.com/langchain-rag-tutorial
Building Production RAG with LangChain
https://docs.langchain.com/rag-guide
[google] Best RAG Frameworks Comparison 2026
[reddit] r/LangChain - RAG framework recommendations 2026
[youtube] RAG Tutorial: LangChain vs LlamaIndex
Cost: 3 API calls = $0.015