Les modèles d'IA inventent régulièrement des détails sur les marques : prix erronés, fonctionnalités inventées, gammes de produits confuses et informations obsolètes. Avant de publier du contenu généré par IA sur les marques, vous avez besoin d'une couche de validation qui vérifie les affirmations par rapport aux données en direct. Ce tutoriel construit un validateur automatisé de mentions de marques qui recherche sur Google et Amazon pour vérifier ou signaler les affirmations générées par IA à 0,005 $ par vérification.
Prérequis
- Python 3.9+ installé
- bibliothèque requests installée
- Une clé API Scavio provenant de scavio.dev
- Contenu généré par IA à valider
Parcours
Étape 1: Extraire les affirmations sur les marques d'un texte généré par IA
Analyser le contenu généré par IA pour trouver les noms de marques, les affirmations de prix, les affirmations de fonctionnalités et les déclarations de comparaison qui nécessitent une vérification.
import os, re, requests
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
H = {'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'}
def extract_claims(text: str) -> list:
"""Extract verifiable claims from AI-generated text."""
claims = []
# Pricing claims: "costs $X", "$X/month", "starts at $X"
for m in re.finditer(r'(\w[\w\s]+?)\s+(?:costs?|priced? at|starts? at|for)\s+(\$[\d,.]+(?:/\w+)?)', text):
claims.append({'type': 'pricing', 'brand': m.group(1).strip(), 'claim': m.group(2)})
# Feature claims: "X offers Y", "X includes Y", "X supports Y"
for m in re.finditer(r'(\w[\w\s]+?)\s+(?:offers?|includes?|supports?|provides?|features?)\s+(.+?)[\.!,]', text):
claims.append({'type': 'feature', 'brand': m.group(1).strip(), 'claim': m.group(2).strip()})
# Comparison claims: "X is better than Y", "X outperforms Y"
for m in re.finditer(r'(\w+)\s+(?:is better than|outperforms|beats|surpasses)\s+(\w+)', text):
claims.append({'type': 'comparison', 'brand': m.group(1), 'claim': f'better than {m.group(2)}'})
return claims
# Example AI-generated text
ai_text = """Notion costs $10/month for the Pro plan and offers real-time collaboration.
Obsidian starts at $50/year for commercial use and supports plugin extensions.
Notion is better than Obsidian for team collaboration."""
claims = extract_claims(ai_text)
for c in claims:
print(f'[{c["type"]}] {c["brand"]}: {c["claim"]}')Étape 2: Vérifier les affirmations par rapport aux données de recherche en direct
Rechercher chaque affirmation et vérifier si les résultats de recherche la corroborent ou la contredisent. Les affirmations de prix bénéficient d'un traitement spécial car les chiffres exacts sont importants.
import time
def verify_claim(claim: dict) -> dict:
"""Verify a single claim against search data."""
brand = claim['brand']
claim_text = claim['claim']
# Build verification query
if claim['type'] == 'pricing':
query = f'{brand} pricing plans 2026'
elif claim['type'] == 'feature':
query = f'{brand} {claim_text}'
else:
query = f'{brand} vs {claim_text.replace("better than ", "")}'
resp = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'query': query, 'country_code': 'us', 'num_results': 5})
results = resp.json().get('organic_results', [])
all_text = ' '.join(f"{r.get('title','')} {r.get('snippet','')}" for r in results).lower()
# Check verification
if claim['type'] == 'pricing':
price_val = re.search(r'\$([\d,.]+)', claim_text)
if price_val:
found = price_val.group(1) in all_text or price_val.group(0) in all_text
return {**claim, 'verified': found,
'status': 'VERIFIED' if found else 'UNVERIFIED',
'evidence': all_text[:200]}
elif claim['type'] == 'feature':
key_terms = [w for w in claim_text.lower().split() if len(w) > 3]
matches = sum(1 for t in key_terms if t in all_text)
coverage = matches / len(key_terms) if key_terms else 0
return {**claim, 'verified': coverage > 0.5,
'status': 'VERIFIED' if coverage > 0.5 else 'UNVERIFIED',
'evidence': all_text[:200]}
return {**claim, 'verified': False, 'status': 'CHECK MANUALLY', 'evidence': all_text[:200]}
for claim in claims:
result = verify_claim(claim)
print(f'[{result["status"]}] {result["brand"]}: {result["claim"]}')
time.sleep(0.3)Étape 3: Construire le rapport de validation
Générer un rapport indiquant quelles affirmations sont vérifiées, non vérifiées ou nécessitent un examen manuel. Signaler le contenu avec trop d'affirmations non vérifiées.
def validate_content(text: str) -> dict:
claims = extract_claims(text)
if not claims:
return {'status': 'NO_CLAIMS', 'message': 'No verifiable claims found'}
results = []
for claim in claims:
result = verify_claim(claim)
results.append(result)
time.sleep(0.3)
verified = sum(1 for r in results if r['status'] == 'VERIFIED')
unverified = sum(1 for r in results if r['status'] == 'UNVERIFIED')
manual = sum(1 for r in results if r['status'] == 'CHECK MANUALLY')
total = len(results)
accuracy = verified / total if total else 0
if accuracy >= 0.8:
overall = 'PASS'
elif accuracy >= 0.5:
overall = 'REVIEW'
else:
overall = 'FAIL'
report = {
'overall': overall,
'accuracy': accuracy,
'verified': verified,
'unverified': unverified,
'manual_check': manual,
'total_claims': total,
'results': results,
'cost': total * 0.005,
}
print(f'Content Validation: {overall}')
print(f'Claims: {verified} verified, {unverified} unverified, {manual} manual check')
print(f'Accuracy: {accuracy:.0%}')
print(f'Cost: ${report["cost"]:.3f}')
for r in results:
icon = 'v' if r['status'] == 'VERIFIED' else 'x' if r['status'] == 'UNVERIFIED' else '?'
print(f' [{icon}] {r["brand"]}: {r["claim"]}')
return report
validate_content(ai_text)Étape 4: Intégrer dans un pipeline de publication de contenu
Ajouter le validateur comme vérification pré-publication. Le contenu avec trop d'affirmations non vérifiées est signalé pour révision humaine avant d'être mis en ligne.
def pre_publish_check(content: str, min_accuracy: float = 0.7) -> dict:
"""Run before publishing AI-generated content."""
report = validate_content(content)
if report.get('status') == 'NO_CLAIMS':
return {'action': 'PUBLISH', 'reason': 'No brand claims to verify'}
if report['accuracy'] >= min_accuracy:
return {
'action': 'PUBLISH',
'reason': f'{report["accuracy"]:.0%} accuracy meets threshold',
'warnings': [r for r in report['results'] if r['status'] != 'VERIFIED']
}
return {
'action': 'HOLD',
'reason': f'{report["accuracy"]:.0%} accuracy below {min_accuracy:.0%} threshold',
'unverified_claims': [r for r in report['results'] if r['status'] != 'VERIFIED'],
'suggestion': 'Review and correct unverified claims before publishing'
}
# Test the pipeline
result = pre_publish_check(ai_text)
print(f'\nAction: {result["action"]}')
print(f'Reason: {result["reason"]}')
if result.get('warnings'):
print('Warnings:')
for w in result['warnings']:
print(f' - {w["brand"]}: {w["claim"]} ({w["status"]})')Exemple Python
import os, re, requests, time
SCAVIO_KEY = os.environ['SCAVIO_API_KEY']
H = {'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'}
def verify_brand_claim(brand, claim):
resp = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'query': f'{brand} {claim}', 'country_code': 'us', 'num_results': 5})
text = ' '.join(r.get('snippet','') for r in resp.json().get('organic_results', [])).lower()
terms = [w for w in claim.lower().split() if len(w) > 3]
matches = sum(1 for t in terms if t in text)
verified = matches / len(terms) > 0.5 if terms else False
return {'brand': brand, 'claim': claim, 'verified': verified}
claims = [('Notion', 'real-time collaboration'), ('Obsidian', 'plugin extensions')]
for brand, claim in claims:
r = verify_brand_claim(brand, claim)
print(f"{'VERIFIED' if r['verified'] else 'UNVERIFIED'}: {r['brand']} - {r['claim']}")
time.sleep(0.3)Exemple JavaScript
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
async function verifyBrandClaim(brand, claim) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query: `${brand} ${claim}`, country_code: 'us', num_results: 5 })
});
const text = ((await resp.json()).organic_results || []).map(r => r.snippet || '').join(' ').toLowerCase();
const terms = claim.toLowerCase().split(' ').filter(w => w.length > 3);
const matches = terms.filter(t => text.includes(t)).length;
const verified = terms.length > 0 && matches / terms.length > 0.5;
console.log(`${verified ? 'VERIFIED' : 'UNVERIFIED'}: ${brand} - ${claim}`);
}
verifyBrandClaim('Notion', 'real-time collaboration');Sortie attendue
Content Validation: REVIEW
Claims: 2 verified, 1 unverified, 0 manual check
Accuracy: 67%
Cost: $0.015
[v] Notion: real-time collaboration
[x] Obsidian: $50/year (pricing may have changed)
[v] Notion: better than Obsidian for team collaboration
Action: HOLD
Reason: 67% accuracy below 70% threshold