并非所有搜索结果都同样值得信赖。引用原始数据的 .gov 域比内容农场博客文章更可靠。本教程构建了一个信任评分管道,用于评估每个搜索结果的来源权威性、内容新鲜度和交叉引用一致性。这些分数可以帮助人工智能代理优先考虑可靠的来源并标记有问题的来源。成本:每次搜索 0.005 美元,加上可选的验证查询。
前置条件
- 已安装 Python 3.9+
- 请求已安装库
- 来自 scavio.dev 的 Scavio API 密钥
操作指南
步骤 1: 定义来源权限层级
根据域名的 TLD 和已知声誉将域名划分为权限级别。这提供了基线信任信号。
Python
AUTHORITY_TIERS = {
'tier1': {
'domains': {'gov', 'edu', 'mil'},
'known_sites': {'reuters.com', 'apnews.com', 'nature.com', 'science.org',
'arxiv.org', 'nih.gov', 'cdc.gov', 'who.int'},
'score': 90
},
'tier2': {
'domains': set(),
'known_sites': {'nytimes.com', 'bbc.com', 'washingtonpost.com',
'github.com', 'stackoverflow.com', 'docs.python.org',
'developer.mozilla.org', 'microsoft.com'},
'score': 75
},
'tier3': {
'domains': {'org', 'io'},
'known_sites': {'medium.com', 'dev.to', 'hackernoon.com', 'reddit.com'},
'score': 50
},
}
def get_authority_score(url: str) -> int:
domain = url.split('/')[2] if '/' in url else ''
tld = domain.split('.')[-1]
for tier_name, tier in AUTHORITY_TIERS.items():
if domain in tier['known_sites'] or tld in tier['domains']:
return tier['score']
return 30 # unknown domain baseline
test_urls = ['https://nih.gov/study', 'https://github.com/repo',
'https://randomsite.xyz/blog']
for url in test_urls:
print(f' {url}: authority={get_authority_score(url)}')步骤 2: 添加新鲜度评分
根据内容最近发布或更新的时间对结果进行评分。从片段和 URL 中提取日期。
Python
import re
from datetime import datetime
def get_freshness_score(snippet: str, url: str) -> int:
"""Score freshness from 0-100 based on detected dates."""
text = snippet + ' ' + url
# Look for year patterns
years = re.findall(r'20(2[4-9])', text)
if years:
latest_year = max(int('20' + y) for y in years)
current_year = 2026
age = current_year - latest_year
if age == 0:
return 100 # current year
elif age == 1:
return 70
elif age == 2:
return 40
else:
return 10
# Look for month-year patterns
months = re.findall(r'(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*\s+202[4-9]', text)
if months:
return 80 # has a recent date reference
return 20 # no date information found
test_snippets = [
('Updated May 2026 - Best CRM tools', 'https://site.com/crm-2026'),
('A comprehensive guide from 2024', 'https://site.com/old-guide'),
('Learn Python programming basics', 'https://site.com/python'),
]
for snippet, url in test_snippets:
print(f' freshness={get_freshness_score(snippet, url):3d}: {snippet[:50]}')步骤 3: 构建综合信任评分管道
将权威性、新鲜度和交叉引用一致性结合到每个搜索结果的单个信任评分中。
Python
import requests, os
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
def trust_score_results(query: str) -> list:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': 10})
results = resp.json().get('organic_results', [])
scored = []
# Collect all snippets for cross-reference
all_snippets = [r.get('snippet', '').lower() for r in results]
for i, r in enumerate(results):
authority = get_authority_score(r['link'])
freshness = get_freshness_score(r.get('snippet', ''), r['link'])
# Cross-reference: do other results mention similar facts?
my_keywords = set(re.findall(r'\b\w{5,}\b', r.get('snippet', '').lower()))
cross_ref = 0
for j, other in enumerate(all_snippets):
if i != j:
other_words = set(re.findall(r'\b\w{5,}\b', other))
overlap = len(my_keywords & other_words)
if overlap > 3:
cross_ref += 1
consistency = min(cross_ref * 20, 100)
# Weighted composite
trust = round(authority * 0.4 + freshness * 0.3 + consistency * 0.3)
scored.append({
'title': r['title'][:50], 'url': r['link'],
'trust_score': trust, 'authority': authority,
'freshness': freshness, 'consistency': consistency
})
scored.sort(key=lambda x: -x['trust_score'])
return scored
results = trust_score_results('best CRM software 2026')
print(f'{"Score":>5} {"Auth":>5} {"Fresh":>5} {"Cross":>5} Title')
print('-' * 70)
for r in results[:5]:
print(f'{r["trust_score"]:>5} {r["authority"]:>5} {r["freshness"]:>5} '
f'{r["consistency"]:>5} {r["title"]}')Python 示例
Python
import requests, os, re
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
KNOWN = {'gov': 90, 'edu': 90, 'github.com': 75, 'stackoverflow.com': 75}
def trust_score(query):
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': 10})
for r in resp.json().get('organic_results', []):
domain = r['link'].split('/')[2] if '/' in r['link'] else ''
tld = domain.split('.')[-1]
auth = KNOWN.get(domain, KNOWN.get(tld, 30))
fresh = 100 if '2026' in r.get('snippet', '') else 40
score = int(auth * 0.5 + fresh * 0.5)
print(f'[{score:3d}] {r["title"][:50]}')
trust_score('python best practices 2026')JavaScript 示例
JavaScript
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
const KNOWN = { gov: 90, edu: 90, 'github.com': 75, 'stackoverflow.com': 75 };
async function trustScore(query) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us', num_results: 10 })
});
for (const r of (await resp.json()).organic_results || []) {
const domain = new URL(r.link).hostname;
const tld = domain.split('.').pop();
const auth = KNOWN[domain] || KNOWN[tld] || 30;
const fresh = (r.snippet || '').includes('2026') ? 100 : 40;
console.log(`[${Math.round(auth*0.5+fresh*0.5)}] ${r.title.slice(0, 50)}`);
}
}
trustScore('python best practices 2026');预期输出
JSON
Score Auth Fresh Cross Title
----------------------------------------------------------------------
82 90 100 40 NIH Guidelines on Data Analysis 2026
75 75 100 60 GitHub - python-best-practices: Updated May
68 75 70 60 Stack Overflow: Python 3.14 New Features
52 30 100 40 Best Python Practices 2026 - TechBlog
38 30 40 40 Python Tips and Tricks - randomsite.com