概述
LLM 的回答需要可靠的数据来源作为锚定。此流水线将 YaCy 去中心化搜索引擎与 LLM 集成,通过搜索获取实时数据来锚定 LLM 的回答,减少幻觉并提供可验证的信息来源。
触发器
每次 LLM 查询
计划
事件驱动
工作流步骤
1
解析 LLM 查询
分析 LLM 收到的查询,提取需要搜索验证的关键信息。
2
执行 YaCy 搜索
通过 YaCy 搜索引擎获取相关的去中心化搜索结果。
3
结果评估和筛选
评估搜索结果的相关性和可信度,筛选高质量来源。
4
注入上下文
将筛选后的搜索数据作为上下文注入 LLM 提示。
5
生成锚定回答
LLM 基于搜索数据生成有据可查的回答,附带来源引用。
Python 实现
Python
import requests, os, json
API_KEY = os.environ["SCAVIO_API_KEY"]
H = {"x-api-key": API_KEY, "Content-Type": "application/json"}
YACY_URL = os.environ.get("YACY_URL", "http://localhost:8090")
def yacy_search(query: str) -> list:
"""Search local YaCy P2P index."""
try:
resp = requests.get(
f"{YACY_URL}/yacysearch.json",
params={"query": query, "maximumRecords": 10},
timeout=5,
)
channels = resp.json().get("channels", [{}])
return [{"title": r.get("title", ""), "url": r.get("link", ""), "snippet": r.get("description", "")}
for r in channels[0].get("items", [])]
except Exception:
return []
def scavio_search(query: str) -> list:
resp = requests.post(
"https://api.scavio.dev/api/v1/search",
headers=H,
json={"query": query, "country_code": "us"},
timeout=10,
)
data = resp.json()
return [{"title": r.get("title", ""), "url": r.get("link", ""), "snippet": r.get("snippet", "")}
for r in data.get("organic_results", [])]
def grounding_pipeline(query: str) -> str:
yacy_results = yacy_search(query)
if len(yacy_results) < 3:
scavio_results = scavio_search(query)
all_results = yacy_results + scavio_results
else:
all_results = yacy_results
# Deduplicate by URL
seen = set()
unique = [r for r in all_results if r["url"] not in seen and not seen.add(r["url"])]
# Format as LLM context
context = "\n\n".join(f"[{r['title']}]({r['url']}): {r['snippet']}" for r in unique[:8])
return context
context = grounding_pipeline("transformer architecture attention mechanism")
print(f"Grounding context ({len(context)} chars):\n{context[:500]}")JavaScript 实现
JavaScript
const H = {'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json'};
const YACY_URL = process.env.YACY_URL || 'http://localhost:8090';
async function yacySearch(query) {
try {
const r = await fetch(YACY_URL+'/yacysearch.json?query='+encodeURIComponent(query)+'&maximumRecords=10');
const channels = (await r.json()).channels || [{}];
return (channels[0].items||[]).map(r=>({title:r.title||'', url:r.link||'', snippet:r.description||''}));
} catch { return []; }
}
async function scavioSearch(query) {
const r = await fetch('https://api.scavio.dev/api/v1/search', {method:'POST', headers:H, body:JSON.stringify({query, country_code:'us'})});
return ((await r.json()).organic_results||[]).map(r=>({title:r.title||'', url:r.link||'', snippet:r.snippet||''}));
}
async function groundingPipeline(query) {
let results = await yacySearch(query);
if (results.length < 3) results = results.concat(await scavioSearch(query));
const seen = new Set();
const unique = results.filter(r=>{ if (seen.has(r.url)) return false; seen.add(r.url); return true; });
return unique.slice(0,8).map(r=>'['+r.title+']('+r.url+'): '+r.snippet).join('\n\n');
}
const ctx = await groundingPipeline('transformer architecture attention mechanism');
console.log('Grounding context ('+ctx.length+' chars):\n'+ctx.slice(0,500));使用的平台
包含知识图谱、PAA和AI概览的网页搜索