SearXNG 是一个开源元搜索引擎,聚合来自 Google、Bing、DuckDuckGo 等的结果。它适合个人使用,但在生产负载下会崩溃,因为上游引擎会积极阻止 IP。本教程在旋转代理后面部署 SearXNG,进行运行状况检查,并在 SearXNG 返回空结果时自动回退到 Scavio。您可以免费搜索大多数查询,并且仅在回退触发时支付 0.005 美元。
前置条件
- 安装了 Docker 和 Docker Compose
- 轮换代理服务(例如 BrightData、Oxylabs)
- 已安装 Python 3.9+
- 用于后备的 Scavio API 密钥
操作指南
步骤 1: 使用 Docker Compose 部署 SearXNG
使用 Redis 设置 SearXNG 以进行速率限制,并设置用于配置引擎和输出格式的自定义设置文件。
# docker-compose.yml
# services:
# searxng:
# image: searxng/searxng:latest
# ports:
# - '8888:8080'
# volumes:
# - ./searxng:/etc/searxng
# environment:
# - SEARXNG_BASE_URL=http://localhost:8888
# redis:
# image: redis:alpine
# ports:
# - '6379:6379'
# searxng/settings.yml (key sections):
# server:
# limiter: true
# secret_key: "change-me-to-random-string"
# outgoing:
# proxies:
# all://:
# - socks5h://user:pass@proxy1:1080
# - socks5h://user:pass@proxy2:1080
# engines:
# - name: google
# shortcut: g
# disabled: false
# - name: bing
# shortcut: b
# disabled: false
import subprocess
result = subprocess.run(['docker', 'compose', 'up', '-d'], capture_output=True, text=True)
print(result.stdout or 'SearXNG started')
print('Access: http://localhost:8888')步骤 2: 构建带有健康检查的 SearXNG 客户端
创建一个客户端来查询 SearXNG JSON API、检测故障(空结果、超时、阻塞响应)并跟踪正常运行时间。
import requests, time
from collections import deque
class SearXNGClient:
def __init__(self, base_url='http://localhost:8888'):
self.base_url = base_url
self.health_window = deque(maxlen=20) # last 20 queries
def search(self, query: str, count: int = 10) -> list:
try:
resp = requests.get(f'{self.base_url}/search', params={
'q': query, 'format': 'json', 'categories': 'general'
}, timeout=15)
if resp.status_code != 200:
self.health_window.append(False)
return []
results = resp.json().get('results', [])
self.health_window.append(len(results) > 0)
return [{'title': r.get('title', ''), 'link': r.get('url', ''),
'snippet': r.get('content', '')} for r in results[:count]]
except requests.exceptions.RequestException:
self.health_window.append(False)
return []
@property
def health_pct(self) -> float:
if not self.health_window:
return 100.0
return sum(self.health_window) / len(self.health_window) * 100
searxng = SearXNGClient()
results = searxng.search('python tutorial')
print(f'Results: {len(results)}, Health: {searxng.health_pct:.0f}%')步骤 3: 添加具有自动故障转移功能的 Scavio 后备功能
当 SearXNG 运行状况降至 50% 以下或未返回结果时,自动路由至 Scavio。记录回退事件以进行监控。
import os
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
def search_with_fallback(query: str, count: int = 10) -> dict:
# Skip SearXNG if health is poor
if searxng.health_pct < 50:
print(f'SearXNG health {searxng.health_pct:.0f}% -- routing to Scavio')
return {'results': scavio_search(query, count), 'provider': 'scavio', 'cost': 0.005}
results = searxng.search(query, count)
if results:
return {'results': results, 'provider': 'searxng', 'cost': 0}
# Fallback
print(f'SearXNG returned 0 results -- falling back to Scavio')
return {'results': scavio_search(query, count), 'provider': 'scavio', 'cost': 0.005}
def scavio_search(query: str, count: int = 10) -> list:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': count})
resp.raise_for_status()
return [{'title': r['title'], 'link': r['link'],
'snippet': r.get('snippet', '')} for r in resp.json().get('organic_results', [])]
result = search_with_fallback('best search api 2026')
print(f'Provider: {result["provider"]}, Results: {len(result["results"])}, Cost: ${result["cost"]}')Python 示例
import requests, os
from collections import deque
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
health = deque(maxlen=20)
def searxng(query, count=10):
try:
resp = requests.get('http://localhost:8888/search',
params={'q': query, 'format': 'json'}, timeout=15)
results = resp.json().get('results', [])[:count]
health.append(len(results) > 0)
return [{'title': r['title'], 'link': r['url'], 'snippet': r.get('content', '')} for r in results]
except Exception:
health.append(False)
return []
def search(query, count=10):
health_pct = sum(health) / len(health) * 100 if health else 100
if health_pct >= 50:
results = searxng(query, count)
if results:
return results
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': count})
return [{'title': r['title'], 'link': r['link'], 'snippet': r.get('snippet', '')}
for r in resp.json().get('organic_results', [])]
for r in search('best python frameworks 2026'):
print(r['title'])JavaScript 示例
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
const health = [];
async function searxng(query, count = 10) {
try {
const resp = await fetch(`http://localhost:8888/search?q=${encodeURIComponent(query)}&format=json`);
const results = (await resp.json()).results?.slice(0, count) || [];
health.push(results.length > 0);
if (health.length > 20) health.shift();
return results.map(r => ({ title: r.title, link: r.url, snippet: r.content }));
} catch { health.push(false); return []; }
}
async function search(query, count = 10) {
const pct = health.length ? health.filter(Boolean).length / health.length * 100 : 100;
if (pct >= 50) { const r = await searxng(query, count); if (r.length) return r; }
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us', num_results: count })
});
return (await resp.json()).organic_results?.map(r => ({ title: r.title, link: r.link, snippet: r.snippet })) || [];
}
search('best python frameworks 2026').then(r => r.forEach(x => console.log(x.title)));预期输出
SearXNG returned 0 results -- falling back to Scavio
Provider: scavio, Results: 10, Cost: $0.005
SearXNG health: 45% (9/20 successful)
Fallback rate: 55% of queries routed to Scavio
Estimated monthly cost at 1000 queries: $2.75