LangChain RAG管道通常使用TavilySearchResults作为网络搜索检索器。收购 Nebius 后,转向独立提供商可以降低供应商风险。 Scavio 落入相同的管道槽:定义具有相同接口的自定义工具,您的链将继续工作。本教程展示了 LangChain RetrievalQA、create_retrieval_chain 和基于代理的 RAG 模式的确切代码更改。
前置条件
- 已安装 Python 3.9+
- 安装 langchain 和 langchain-core 软件包
- 来自 scavio.dev 的 Scavio API 密钥
- LLM API 密钥(OpenAI 或 Anthropic)
操作指南
步骤 1: 创建 Scavio 搜索检索器
构建一个调用 Scavio API 的与 LangChain 兼容的检索器。它返回带有 page_content 和元数据的 Document 对象,与 TavilySearchResults 提供的内容相匹配。
import os, requests
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from typing import List
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
class ScavioRetriever(BaseRetriever):
api_key: str = ''
num_results: int = 5
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.api_key = self.api_key or SCAVIO_KEY
def _get_relevant_documents(self, query: str) -> List[Document]:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': self.api_key, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': self.num_results})
resp.raise_for_status()
docs = []
for r in resp.json().get('organic_results', []):
docs.append(Document(
page_content=f"{r['title']}\n{r.get('snippet', '')}",
metadata={'source': r['link'], 'title': r['title']}
))
return docs
retriever = ScavioRetriever(num_results=5)
docs = retriever.invoke('LangChain RAG tutorial 2026')
for d in docs:
print(f'{d.metadata["title"][:50]}\n {d.metadata["source"]}')步骤 2: 替换链中的 TavillySearchResults
将 TavilySearchResults 替换为基于代理的 RAG 中的 Scavio 工具。工具包装器维护相同的界面,因此您的代理配置保持不变。
from langchain_core.tools import tool
@tool
def web_search(query: str) -> str:
"""Search the web for current information. Returns relevant results with titles, snippets, and URLs."""
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': 5})
results = resp.json().get('organic_results', [])
return '\n\n'.join(
f'Title: {r["title"]}\nContent: {r.get("snippet", "")}\nSource: {r["link"]}'
for r in results
)
# BEFORE:
# from langchain_community.tools.tavily_search import TavilySearchResults
# tools = [TavilySearchResults(max_results=5)]
# AFTER:
tools = [web_search]
result = web_search.invoke('LangChain vs LlamaIndex 2026')
print(result[:300])步骤 3: 更新 RetrievalQA 链
如果您使用 RetrievalQA 并使用 Tavily 作为检索器,请交换 ScavioRetriever。链接口保持相同。
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
# BEFORE:
# from langchain_community.retrievers import TavilySearchAPIRetriever
# retriever = TavilySearchAPIRetriever(k=5)
# AFTER:
retriever = ScavioRetriever(num_results=5)
# The rest of your chain stays exactly the same
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type='stuff',
retriever=retriever,
return_source_documents=True
)
# Run a query
result = qa_chain.invoke({'query': 'What are the best RAG frameworks in 2026?'})
print(f'Answer: {result["result"][:200]}')
print(f'\nSources:')
for doc in result['source_documents'][:3]:
print(f' - {doc.metadata["source"]}')步骤 4: 添加多平台检索以实现更丰富的 RAG
将 Reddit 和 YouTube 资源添加到您的 RAG 管道中,超越 Tavilly 提供的功能。这提供了法学硕士社区的意见和视频参考以及网络结果。
class MultiPlatformRetriever(BaseRetriever):
api_key: str = ''
platforms: list = ['google', 'reddit']
results_per_platform: int = 3
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.api_key = self.api_key or SCAVIO_KEY
def _get_relevant_documents(self, query: str) -> List[Document]:
docs = []
site_map = {'reddit': 'reddit.com', 'youtube': 'youtube.com'}
for platform in self.platforms:
q = f'site:{site_map[platform]} {query}' if platform in site_map else query
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': self.api_key, 'Content-Type': 'application/json'},
json={'query': q, 'country_code': 'us', 'num_results': self.results_per_platform})
for r in resp.json().get('organic_results', []):
docs.append(Document(
page_content=f'[{platform.upper()}] {r["title"]}\n{r.get("snippet", "")}',
metadata={'source': r['link'], 'title': r['title'], 'platform': platform}
))
return docs
retriever = MultiPlatformRetriever(platforms=['google', 'reddit', 'youtube'])
docs = retriever.invoke('best python web framework 2026')
for d in docs:
print(f'[{d.metadata["platform"]}] {d.metadata["title"][:50]}')
print(f'\nCost: {len(set(d.metadata["platform"] for d in docs))} API calls = ${len(set(d.metadata["platform"] for d in docs)) * 0.005:.3f}')Python 示例
import os, requests
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from typing import List
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
class ScavioRetriever(BaseRetriever):
api_key: str = ''
num_results: int = 5
def __init__(self, **kw):
super().__init__(**kw)
self.api_key = self.api_key or SCAVIO_KEY
def _get_relevant_documents(self, query: str) -> List[Document]:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': self.api_key, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': self.num_results})
return [Document(page_content=f"{r['title']}\n{r.get('snippet','')}",
metadata={'source': r['link']}) for r in resp.json().get('organic_results', [])]
retriever = ScavioRetriever(num_results=5)
docs = retriever.invoke('RAG frameworks 2026')
for d in docs:
print(f"{d.page_content[:60]}\n {d.metadata['source']}")JavaScript 示例
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
async function searchForRAG(query, num = 5) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us', num_results: num })
});
const data = await resp.json();
return (data.organic_results || []).map(r => ({
pageContent: `${r.title}\n${r.snippet || ''}`,
metadata: { source: r.link, title: r.title }
}));
}
searchForRAG('RAG frameworks 2026').then(docs => {
docs.forEach(d => console.log(`${d.pageContent.slice(0, 60)}\n ${d.metadata.source}`));
});预期输出
LangChain RAG Tutorial: Complete Guide 2026
https://example.com/langchain-rag-tutorial
Building Production RAG with LangChain
https://docs.langchain.com/rag-guide
[google] Best RAG Frameworks Comparison 2026
[reddit] r/LangChain - RAG framework recommendations 2026
[youtube] RAG Tutorial: LangChain vs LlamaIndex
Cost: 3 API calls = $0.015