AI Overview citations determine which websites get visibility in Google's AI-generated answers. Tracking these citations over time reveals which domains are gaining or losing AI search visibility, which content formats get cited most, and how Google's citation patterns change. This tutorial shows how to build an automated AEO citation tracker that monitors a set of keywords, records which domains appear in AI Overview sources, and tracks citation frequency over time. You will create a pipeline that runs daily and generates trend reports.
Prerequisites
- Python 3.8+ installed
- requests library installed
- A Scavio API key from scavio.dev
- A list of keywords to monitor for AI Overview citations
Walkthrough
Step 1: Set up the tracker
Define the keywords and storage for citation tracking.
import os, requests, json
from datetime import date
from collections import Counter
API_KEY = os.environ["SCAVIO_API_KEY"]
TRACKER_FILE = "aeo_citations.json"
KEYWORDS = [
"what is RAG in AI",
"best CRM for small business",
"how to automate email outreach",
"python web scraping alternatives",
]Step 2: Extract AI Overview citations
Query each keyword and extract the domains cited in AI Overviews.
def extract_citations(keyword):
resp = requests.post("https://api.scavio.dev/api/v1/search",
headers={"x-api-key": API_KEY},
json={"platform": "google", "query": keyword})
data = resp.json()
aio = data.get("ai_overview", {})
sources = aio.get("sources", []) if aio else []
from urllib.parse import urlparse
domains = [urlparse(s.get("link", "")).netloc for s in sources if s.get("link")]
return {
"keyword": keyword,
"has_aio": bool(aio),
"citation_count": len(sources),
"domains": domains,
"urls": [s.get("link", "") for s in sources],
"date": date.today().isoformat(),
}Step 3: Build the citation database
Store citation data over time to enable trend analysis.
def load_tracker():
try:
with open(TRACKER_FILE) as f:
return json.load(f)
except FileNotFoundError:
return []
def save_snapshot(keywords):
tracker = load_tracker()
snapshot = {
"date": date.today().isoformat(),
"citations": [extract_citations(kw) for kw in keywords],
}
all_domains = []
for c in snapshot["citations"]:
all_domains.extend(c["domains"])
snapshot["top_domains"] = Counter(all_domains).most_common(10)
tracker.append(snapshot)
with open(TRACKER_FILE, "w") as f:
json.dump(tracker, f, indent=2)
return snapshotStep 4: Generate trend report
Compare snapshots to detect citation changes and domain ranking shifts.
def trend_report():
tracker = load_tracker()
if len(tracker) < 2:
return {"status": "need at least 2 snapshots"}
current = tracker[-1]
previous = tracker[-2]
curr_domains = Counter()
prev_domains = Counter()
for c in current["citations"]:
curr_domains.update(c["domains"])
for c in previous["citations"]:
prev_domains.update(c["domains"])
gainers = {d: curr_domains[d] - prev_domains.get(d, 0)
for d in curr_domains if curr_domains[d] > prev_domains.get(d, 0)}
losers = {d: prev_domains[d] - curr_domains.get(d, 0)
for d in prev_domains if prev_domains[d] > curr_domains.get(d, 0)}
return {
"period": f"{previous['date']} -> {current['date']}",
"gainers": dict(sorted(gainers.items(), key=lambda x: -x[1])[:5]),
"losers": dict(sorted(losers.items(), key=lambda x: -x[1])[:5]),
}Step 5: Track specific domain performance
Monitor how a specific domain's citation count changes over time.
def domain_performance(domain):
tracker = load_tracker()
performance = []
for snapshot in tracker:
count = 0
for c in snapshot["citations"]:
count += sum(1 for d in c["domains"] if domain in d)
performance.append({"date": snapshot["date"], "citations": count})
return performance
perf = domain_performance("example.com")
for p in perf:
print(f"{p['date']}: {p['citations']} citations")Python Example
import os, requests
API_KEY = os.environ["SCAVIO_API_KEY"]
def citations(keyword):
resp = requests.post("https://api.scavio.dev/api/v1/search",
headers={"x-api-key": API_KEY},
json={"platform": "google", "query": keyword})
aio = resp.json().get("ai_overview", {})
sources = aio.get("sources", []) if aio else []
return {"keyword": keyword, "count": len(sources),
"domains": [s.get("link","")[:50] for s in sources[:3]]}
print(citations("what is RAG in AI"))JavaScript Example
const H = {"x-api-key": process.env.SCAVIO_API_KEY, "Content-Type": "application/json"};
async function citations(keyword) {
const r = await fetch("https://api.scavio.dev/api/v1/search", {
method: "POST", headers: H,
body: JSON.stringify({platform: "google", query: keyword})
});
const aio = (await r.json()).ai_overview || {};
const sources = aio.sources || [];
return {keyword, count: sources.length, domains: sources.slice(0,3).map(s=>s.link)};
}
citations("what is RAG in AI").then(console.log);Expected Output
An automated citation tracking pipeline that monitors AI Overview sources daily, stores citation history, identifies trending domains, and generates domain-level performance reports.