AI models regularly fabricate brand details: wrong pricing, invented features, confused product lines, and outdated information. Before publishing AI-generated content about brands, you need a validation layer that checks claims against live data. This tutorial builds an automated brand mention validator that searches Google and Amazon to verify or flag AI-generated claims at $0.005 per check.
Prerequisites
- Python 3.9+ installed
- requests library installed
- A Scavio API key from scavio.dev
- AI-generated content to validate
Walkthrough
Step 1: Extract brand claims from AI-generated text
Parse AI-generated content to find brand names, pricing claims, feature claims, and comparison statements that need verification.
import os, re, requests
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
H = {'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'}
def extract_claims(text: str) -> list:
"""Extract verifiable claims from AI-generated text."""
claims = []
# Pricing claims: "costs $X", "$X/month", "starts at $X"
for m in re.finditer(r'(\w[\w\s]+?)\s+(?:costs?|priced? at|starts? at|for)\s+(\$[\d,.]+(?:/\w+)?)', text):
claims.append({'type': 'pricing', 'brand': m.group(1).strip(), 'claim': m.group(2)})
# Feature claims: "X offers Y", "X includes Y", "X supports Y"
for m in re.finditer(r'(\w[\w\s]+?)\s+(?:offers?|includes?|supports?|provides?|features?)\s+(.+?)[\.!,]', text):
claims.append({'type': 'feature', 'brand': m.group(1).strip(), 'claim': m.group(2).strip()})
# Comparison claims: "X is better than Y", "X outperforms Y"
for m in re.finditer(r'(\w+)\s+(?:is better than|outperforms|beats|surpasses)\s+(\w+)', text):
claims.append({'type': 'comparison', 'brand': m.group(1), 'claim': f'better than {m.group(2)}'})
return claims
# Example AI-generated text
ai_text = """Notion costs $10/month for the Pro plan and offers real-time collaboration.
Obsidian starts at $50/year for commercial use and supports plugin extensions.
Notion is better than Obsidian for team collaboration."""
claims = extract_claims(ai_text)
for c in claims:
print(f'[{c["type"]}] {c["brand"]}: {c["claim"]}')Step 2: Verify claims against live search data
Search for each claim and check if the search results corroborate or contradict it. Pricing claims get special handling since exact numbers matter.
import time
def verify_claim(claim: dict) -> dict:
"""Verify a single claim against search data."""
brand = claim['brand']
claim_text = claim['claim']
# Build verification query
if claim['type'] == 'pricing':
query = f'{brand} pricing plans 2026'
elif claim['type'] == 'feature':
query = f'{brand} {claim_text}'
else:
query = f'{brand} vs {claim_text.replace("better than ", "")}'
resp = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'query': query, 'country_code': 'us', 'num_results': 5})
results = resp.json().get('organic_results', [])
all_text = ' '.join(f"{r.get('title','')} {r.get('snippet','')}" for r in results).lower()
# Check verification
if claim['type'] == 'pricing':
price_val = re.search(r'\$([\d,.]+)', claim_text)
if price_val:
found = price_val.group(1) in all_text or price_val.group(0) in all_text
return {**claim, 'verified': found,
'status': 'VERIFIED' if found else 'UNVERIFIED',
'evidence': all_text[:200]}
elif claim['type'] == 'feature':
key_terms = [w for w in claim_text.lower().split() if len(w) > 3]
matches = sum(1 for t in key_terms if t in all_text)
coverage = matches / len(key_terms) if key_terms else 0
return {**claim, 'verified': coverage > 0.5,
'status': 'VERIFIED' if coverage > 0.5 else 'UNVERIFIED',
'evidence': all_text[:200]}
return {**claim, 'verified': False, 'status': 'CHECK MANUALLY', 'evidence': all_text[:200]}
for claim in claims:
result = verify_claim(claim)
print(f'[{result["status"]}] {result["brand"]}: {result["claim"]}')
time.sleep(0.3)Step 3: Build the validation report
Generate a report showing which claims are verified, unverified, or need manual review. Flag content with too many unverified claims.
def validate_content(text: str) -> dict:
claims = extract_claims(text)
if not claims:
return {'status': 'NO_CLAIMS', 'message': 'No verifiable claims found'}
results = []
for claim in claims:
result = verify_claim(claim)
results.append(result)
time.sleep(0.3)
verified = sum(1 for r in results if r['status'] == 'VERIFIED')
unverified = sum(1 for r in results if r['status'] == 'UNVERIFIED')
manual = sum(1 for r in results if r['status'] == 'CHECK MANUALLY')
total = len(results)
accuracy = verified / total if total else 0
if accuracy >= 0.8:
overall = 'PASS'
elif accuracy >= 0.5:
overall = 'REVIEW'
else:
overall = 'FAIL'
report = {
'overall': overall,
'accuracy': accuracy,
'verified': verified,
'unverified': unverified,
'manual_check': manual,
'total_claims': total,
'results': results,
'cost': total * 0.005,
}
print(f'Content Validation: {overall}')
print(f'Claims: {verified} verified, {unverified} unverified, {manual} manual check')
print(f'Accuracy: {accuracy:.0%}')
print(f'Cost: ${report["cost"]:.3f}')
for r in results:
icon = 'v' if r['status'] == 'VERIFIED' else 'x' if r['status'] == 'UNVERIFIED' else '?'
print(f' [{icon}] {r["brand"]}: {r["claim"]}')
return report
validate_content(ai_text)Step 4: Integrate into a content publishing pipeline
Add the validator as a pre-publish check. Content with too many unverified claims gets flagged for human review before going live.
def pre_publish_check(content: str, min_accuracy: float = 0.7) -> dict:
"""Run before publishing AI-generated content."""
report = validate_content(content)
if report.get('status') == 'NO_CLAIMS':
return {'action': 'PUBLISH', 'reason': 'No brand claims to verify'}
if report['accuracy'] >= min_accuracy:
return {
'action': 'PUBLISH',
'reason': f'{report["accuracy"]:.0%} accuracy meets threshold',
'warnings': [r for r in report['results'] if r['status'] != 'VERIFIED']
}
return {
'action': 'HOLD',
'reason': f'{report["accuracy"]:.0%} accuracy below {min_accuracy:.0%} threshold',
'unverified_claims': [r for r in report['results'] if r['status'] != 'VERIFIED'],
'suggestion': 'Review and correct unverified claims before publishing'
}
# Test the pipeline
result = pre_publish_check(ai_text)
print(f'\nAction: {result["action"]}')
print(f'Reason: {result["reason"]}')
if result.get('warnings'):
print('Warnings:')
for w in result['warnings']:
print(f' - {w["brand"]}: {w["claim"]} ({w["status"]})')Python Example
import os, re, requests, time
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
H = {'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'}
def verify_brand_claim(brand, claim):
resp = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'query': f'{brand} {claim}', 'country_code': 'us', 'num_results': 5})
text = ' '.join(r.get('snippet','') for r in resp.json().get('organic_results', [])).lower()
terms = [w for w in claim.lower().split() if len(w) > 3]
matches = sum(1 for t in terms if t in text)
verified = matches / len(terms) > 0.5 if terms else False
return {'brand': brand, 'claim': claim, 'verified': verified}
claims = [('Notion', 'real-time collaboration'), ('Obsidian', 'plugin extensions')]
for brand, claim in claims:
r = verify_brand_claim(brand, claim)
print(f"{'VERIFIED' if r['verified'] else 'UNVERIFIED'}: {r['brand']} - {r['claim']}")
time.sleep(0.3)JavaScript Example
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
async function verifyBrandClaim(brand, claim) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query: `${brand} ${claim}`, country_code: 'us', num_results: 5 })
});
const text = ((await resp.json()).organic_results || []).map(r => r.snippet || '').join(' ').toLowerCase();
const terms = claim.toLowerCase().split(' ').filter(w => w.length > 3);
const matches = terms.filter(t => text.includes(t)).length;
const verified = terms.length > 0 && matches / terms.length > 0.5;
console.log(`${verified ? 'VERIFIED' : 'UNVERIFIED'}: ${brand} - ${claim}`);
}
verifyBrandClaim('Notion', 'real-time collaboration');Expected Output
Content Validation: REVIEW
Claims: 2 verified, 1 unverified, 0 manual check
Accuracy: 67%
Cost: $0.015
[v] Notion: real-time collaboration
[x] Obsidian: $50/year (pricing may have changed)
[v] Notion: better than Obsidian for team collaboration
Action: HOLD
Reason: 67% accuracy below 70% threshold