Not all search results are equally trustworthy. A .gov domain citing primary data is more reliable than a content-farm blog post. This tutorial builds a trust scoring pipeline that evaluates each search result on source authority, content freshness, and cross-reference consistency. The scores help AI agents prioritize reliable sources and flag questionable ones. Cost: $0.005 per search, plus optional verification queries.
Prerequisites
- Python 3.9+ installed
- requests library installed
- A Scavio API key from scavio.dev
Walkthrough
Step 1: Define source authority tiers
Classify domains into authority tiers based on their TLD and known reputation. This provides a baseline trust signal.
AUTHORITY_TIERS = {
'tier1': {
'domains': {'gov', 'edu', 'mil'},
'known_sites': {'reuters.com', 'apnews.com', 'nature.com', 'science.org',
'arxiv.org', 'nih.gov', 'cdc.gov', 'who.int'},
'score': 90
},
'tier2': {
'domains': set(),
'known_sites': {'nytimes.com', 'bbc.com', 'washingtonpost.com',
'github.com', 'stackoverflow.com', 'docs.python.org',
'developer.mozilla.org', 'microsoft.com'},
'score': 75
},
'tier3': {
'domains': {'org', 'io'},
'known_sites': {'medium.com', 'dev.to', 'hackernoon.com', 'reddit.com'},
'score': 50
},
}
def get_authority_score(url: str) -> int:
domain = url.split('/')[2] if '/' in url else ''
tld = domain.split('.')[-1]
for tier_name, tier in AUTHORITY_TIERS.items():
if domain in tier['known_sites'] or tld in tier['domains']:
return tier['score']
return 30 # unknown domain baseline
test_urls = ['https://nih.gov/study', 'https://github.com/repo',
'https://randomsite.xyz/blog']
for url in test_urls:
print(f' {url}: authority={get_authority_score(url)}')Step 2: Add freshness scoring
Score results based on how recently the content was published or updated. Extract dates from snippets and URLs.
import re
from datetime import datetime
def get_freshness_score(snippet: str, url: str) -> int:
"""Score freshness from 0-100 based on detected dates."""
text = snippet + ' ' + url
# Look for year patterns
years = re.findall(r'20(2[4-9])', text)
if years:
latest_year = max(int('20' + y) for y in years)
current_year = 2026
age = current_year - latest_year
if age == 0:
return 100 # current year
elif age == 1:
return 70
elif age == 2:
return 40
else:
return 10
# Look for month-year patterns
months = re.findall(r'(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*\s+202[4-9]', text)
if months:
return 80 # has a recent date reference
return 20 # no date information found
test_snippets = [
('Updated May 2026 - Best CRM tools', 'https://site.com/crm-2026'),
('A comprehensive guide from 2024', 'https://site.com/old-guide'),
('Learn Python programming basics', 'https://site.com/python'),
]
for snippet, url in test_snippets:
print(f' freshness={get_freshness_score(snippet, url):3d}: {snippet[:50]}')Step 3: Build the composite trust scoring pipeline
Combine authority, freshness, and cross-reference consistency into a single trust score for each search result.
import requests, os
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
def trust_score_results(query: str) -> list:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': 10})
results = resp.json().get('organic_results', [])
scored = []
# Collect all snippets for cross-reference
all_snippets = [r.get('snippet', '').lower() for r in results]
for i, r in enumerate(results):
authority = get_authority_score(r['link'])
freshness = get_freshness_score(r.get('snippet', ''), r['link'])
# Cross-reference: do other results mention similar facts?
my_keywords = set(re.findall(r'\b\w{5,}\b', r.get('snippet', '').lower()))
cross_ref = 0
for j, other in enumerate(all_snippets):
if i != j:
other_words = set(re.findall(r'\b\w{5,}\b', other))
overlap = len(my_keywords & other_words)
if overlap > 3:
cross_ref += 1
consistency = min(cross_ref * 20, 100)
# Weighted composite
trust = round(authority * 0.4 + freshness * 0.3 + consistency * 0.3)
scored.append({
'title': r['title'][:50], 'url': r['link'],
'trust_score': trust, 'authority': authority,
'freshness': freshness, 'consistency': consistency
})
scored.sort(key=lambda x: -x['trust_score'])
return scored
results = trust_score_results('best CRM software 2026')
print(f'{"Score":>5} {"Auth":>5} {"Fresh":>5} {"Cross":>5} Title')
print('-' * 70)
for r in results[:5]:
print(f'{r["trust_score"]:>5} {r["authority"]:>5} {r["freshness"]:>5} '
f'{r["consistency"]:>5} {r["title"]}')Python Example
import requests, os, re
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
KNOWN = {'gov': 90, 'edu': 90, 'github.com': 75, 'stackoverflow.com': 75}
def trust_score(query):
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': 10})
for r in resp.json().get('organic_results', []):
domain = r['link'].split('/')[2] if '/' in r['link'] else ''
tld = domain.split('.')[-1]
auth = KNOWN.get(domain, KNOWN.get(tld, 30))
fresh = 100 if '2026' in r.get('snippet', '') else 40
score = int(auth * 0.5 + fresh * 0.5)
print(f'[{score:3d}] {r["title"][:50]}')
trust_score('python best practices 2026')JavaScript Example
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
const KNOWN = { gov: 90, edu: 90, 'github.com': 75, 'stackoverflow.com': 75 };
async function trustScore(query) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us', num_results: 10 })
});
for (const r of (await resp.json()).organic_results || []) {
const domain = new URL(r.link).hostname;
const tld = domain.split('.').pop();
const auth = KNOWN[domain] || KNOWN[tld] || 30;
const fresh = (r.snippet || '').includes('2026') ? 100 : 40;
console.log(`[${Math.round(auth*0.5+fresh*0.5)}] ${r.title.slice(0, 50)}`);
}
}
trustScore('python best practices 2026');Expected Output
Score Auth Fresh Cross Title
----------------------------------------------------------------------
82 90 100 40 NIH Guidelines on Data Analysis 2026
75 75 100 60 GitHub - python-best-practices: Updated May
68 75 70 60 Stack Overflow: Python 3.14 New Features
52 30 100 40 Best Python Practices 2026 - TechBlog
38 30 40 40 Python Tips and Tricks - randomsite.com