Tracking AI Overview citation changes lets you detect the moment your URLs gain or lose placement in Google's AI-generated answers. As AI Overviews reshape click distribution, knowing which domains appear in citations -- and when that list shifts -- is a direct signal for content strategy. The Scavio search API returns structured AI Overview data including cited URLs, so you can poll queries on a schedule, diff the citation lists, and trigger alerts when competitors enter or your pages drop. This tutorial builds a Python script that stores citation snapshots in JSON and reports changes between runs.
Prerequisites
- Python 3.8 or higher installed
- requests library installed (pip install requests)
- A Scavio API key from scavio.dev
- Basic familiarity with JSON file I/O in Python
Walkthrough
Step 1: Define queries and target URLs to monitor
Create a configuration dict mapping target queries to the URLs you care about. The script will check whether these URLs appear in AI Overview citations for each query.
MONITOR_CONFIG = {
"best crm for startups": {
"my_urls": ["https://mysite.com/crm-guide"],
"watch_competitors": True
},
"how to automate lead scoring": {
"my_urls": ["https://mysite.com/lead-scoring"],
"watch_competitors": True
}
}Step 2: Fetch AI Overview citations from Scavio
POST to the Scavio search endpoint for each query. The response includes an ai_overview object with a citations array containing the URLs referenced in the AI-generated answer.
import requests
import os
API_KEY = os.environ.get('SCAVIO_API_KEY', 'your_scavio_api_key')
def fetch_citations(query: str) -> list[str]:
response = requests.post(
'https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY},
json={'query': query, 'country_code': 'us'}
)
response.raise_for_status()
data = response.json()
ai_overview = data.get('ai_overview', {})
citations = ai_overview.get('citations', [])
return [c.get('url', '') for c in citations]Step 3: Load previous snapshot and compute diff
Read the last saved snapshot from disk and compare it to the fresh citation list. Identify which URLs were added and which were removed since the last check.
import json
from pathlib import Path
SNAPSHOT_FILE = Path('citation_snapshots.json')
def load_snapshot() -> dict:
if SNAPSHOT_FILE.exists():
return json.loads(SNAPSHOT_FILE.read_text())
return {}
def diff_citations(old: list[str], new: list[str]) -> dict:
old_set, new_set = set(old), set(new)
return {
'added': list(new_set - old_set),
'removed': list(old_set - new_set),
'unchanged': list(old_set & new_set)
}Step 4: Run the monitor loop and save updated snapshot
Iterate through all monitored queries, fetch current citations, diff against the previous snapshot, and print alerts for any changes. Save the new snapshot to disk for the next run.
from datetime import datetime
def run_monitor():
snapshot = load_snapshot()
new_snapshot = {}
for query, config in MONITOR_CONFIG.items():
current = fetch_citations(query)
new_snapshot[query] = current
previous = snapshot.get(query, [])
changes = diff_citations(previous, current)
if changes['added'] or changes['removed']:
print(f'[{datetime.now().isoformat()}] Changes for: {query}')
for url in changes['added']:
label = 'MY URL' if url in config['my_urls'] else 'COMPETITOR'
print(f' + ADDED ({label}): {url}')
for url in changes['removed']:
label = 'MY URL' if url in config['my_urls'] else 'COMPETITOR'
print(f' - REMOVED ({label}): {url}')
SNAPSHOT_FILE.write_text(json.dumps(new_snapshot, indent=2))
print(f'Snapshot saved with {len(new_snapshot)} queries')Python Example
import os
import json
import requests
from pathlib import Path
from datetime import datetime
API_KEY = os.environ.get('SCAVIO_API_KEY', 'your_scavio_api_key')
ENDPOINT = 'https://api.scavio.dev/api/v1/search'
SNAPSHOT_FILE = Path('citation_snapshots.json')
MONITOR_CONFIG = {
'best crm for startups': {
'my_urls': ['https://mysite.com/crm-guide'],
'watch_competitors': True
},
'how to automate lead scoring': {
'my_urls': ['https://mysite.com/lead-scoring'],
'watch_competitors': True
}
}
def fetch_citations(query: str) -> list[str]:
response = requests.post(
ENDPOINT,
headers={'x-api-key': API_KEY},
json={'query': query, 'country_code': 'us'}
)
response.raise_for_status()
data = response.json()
ai_overview = data.get('ai_overview', {})
return [c.get('url', '') for c in ai_overview.get('citations', [])]
def load_snapshot() -> dict:
if SNAPSHOT_FILE.exists():
return json.loads(SNAPSHOT_FILE.read_text())
return {}
def diff_citations(old: list[str], new: list[str]) -> dict:
old_set, new_set = set(old), set(new)
return {'added': list(new_set - old_set), 'removed': list(old_set - new_set)}
def run_monitor():
snapshot = load_snapshot()
new_snapshot = {}
for query, config in MONITOR_CONFIG.items():
current = fetch_citations(query)
new_snapshot[query] = current
previous = snapshot.get(query, [])
changes = diff_citations(previous, current)
if changes['added'] or changes['removed']:
print(f'[{datetime.now().isoformat()}] Changes for: {query}')
for url in changes['added']:
label = 'MY URL' if url in config['my_urls'] else 'COMPETITOR'
print(f' + ADDED ({label}): {url}')
for url in changes['removed']:
label = 'MY URL' if url in config['my_urls'] else 'COMPETITOR'
print(f' - REMOVED ({label}): {url}')
SNAPSHOT_FILE.write_text(json.dumps(new_snapshot, indent=2))
print(f'Snapshot saved with {len(new_snapshot)} queries')
if __name__ == '__main__':
run_monitor()JavaScript Example
const API_KEY = process.env.SCAVIO_API_KEY || 'your_scavio_api_key';
const ENDPOINT = 'https://api.scavio.dev/api/v1/search';
const fs = require('fs');
const SNAPSHOT_FILE = 'citation_snapshots.json';
const MONITOR_CONFIG = {
'best crm for startups': {
myUrls: ['https://mysite.com/crm-guide'],
watchCompetitors: true
},
'how to automate lead scoring': {
myUrls: ['https://mysite.com/lead-scoring'],
watchCompetitors: true
}
};
async function fetchCitations(query) {
const response = await fetch(ENDPOINT, {
method: 'POST',
headers: { 'x-api-key': API_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us' })
});
if (!response.ok) throw new Error('HTTP ' + response.status);
const data = await response.json();
const aiOverview = data.ai_overview || {};
return (aiOverview.citations || []).map(c => c.url || '');
}
function loadSnapshot() {
if (fs.existsSync(SNAPSHOT_FILE)) {
return JSON.parse(fs.readFileSync(SNAPSHOT_FILE, 'utf-8'));
}
return {};
}
function diffCitations(oldList, newList) {
const oldSet = new Set(oldList);
const newSet = new Set(newList);
return {
added: [...newSet].filter(u => !oldSet.has(u)),
removed: [...oldSet].filter(u => !newSet.has(u))
};
}
async function main() {
const snapshot = loadSnapshot();
const newSnapshot = {};
for (const [query, config] of Object.entries(MONITOR_CONFIG)) {
const current = await fetchCitations(query);
newSnapshot[query] = current;
const previous = snapshot[query] || [];
const changes = diffCitations(previous, current);
if (changes.added.length || changes.removed.length) {
console.log('[' + new Date().toISOString() + '] Changes for: ' + query);
changes.added.forEach(url => {
const label = config.myUrls.includes(url) ? 'MY URL' : 'COMPETITOR';
console.log(' + ADDED (' + label + '): ' + url);
});
changes.removed.forEach(url => {
const label = config.myUrls.includes(url) ? 'MY URL' : 'COMPETITOR';
console.log(' - REMOVED (' + label + '): ' + url);
});
}
}
fs.writeFileSync(SNAPSHOT_FILE, JSON.stringify(newSnapshot, null, 2));
console.log('Snapshot saved with ' + Object.keys(newSnapshot).length + ' queries');
}
main().catch(console.error);Expected Output
{
"search_metadata": { "query": "best crm for startups", "country_code": "us" },
"ai_overview": {
"text": "The best CRM for startups depends on team size and budget...",
"citations": [
{ "url": "https://mysite.com/crm-guide", "title": "Top CRM Tools for Startups" },
{ "url": "https://competitor.com/crm-review", "title": "CRM Comparison 2026" }
]
}
}