Content gap analysis identifies topics your audience searches for but your site does not cover. Google's People Also Ask (PAA) boxes reveal the follow-up questions users have after searching for a topic. By comparing PAA questions across your target keywords against your existing content, you can identify gaps and generate content briefs for missing topics. This tutorial builds an automated content gap analyzer that fetches PAA data for a keyword set, clusters questions by theme, and outputs a prioritized list of content opportunities.
Prerequisites
- Python 3.10 or higher
- requests library installed
- A Scavio API key
- A list of target keywords to analyze
Walkthrough
Step 1: Fetch PAA data for target keywords
Query each keyword through the Scavio API and collect the People Also Ask questions.
def get_paa_questions(keyword: str) -> list[str]:
r = requests.post(
"https://api.scavio.dev/api/v1/search",
headers={"x-api-key": API_KEY},
json={"query": keyword, "country_code": "us"}
)
r.raise_for_status()
paa = r.json().get("people_also_ask", [])
return [item["question"] for item in paa]Step 2: Collect questions across all keywords
Build a master list of all PAA questions, tracking which keyword triggered each question.
import time
def collect_all_paa(keywords: list[str]) -> list[dict]:
all_questions = []
for kw in keywords:
questions = get_paa_questions(kw)
for q in questions:
all_questions.append({"question": q, "seed_keyword": kw})
time.sleep(0.5)
return all_questionsStep 3: Deduplicate and cluster questions
Remove duplicate questions and group similar ones by looking for common words.
from collections import defaultdict
def cluster_questions(questions: list[dict]) -> dict[str, list[str]]:
seen = set()
unique = []
for q in questions:
normalized = q["question"].lower().strip("?")
if normalized not in seen:
seen.add(normalized)
unique.append(q)
clusters = defaultdict(list)
for q in unique:
words = q["question"].lower().split()
topic = words[0] + " " + words[1] if len(words) > 1 else words[0]
clusters[topic].append(q["question"])
return dict(clusters)Step 4: Generate content briefs
For each content gap, generate a brief that includes the target question, related questions, and the seed keyword it came from.
def generate_briefs(questions: list[dict]) -> list[dict]:
briefs = []
for q in questions[:20]:
brief = {
"target_question": q["question"],
"seed_keyword": q["seed_keyword"],
"suggested_title": q["question"].rstrip("?") + " - Complete Guide",
"content_type": "guide" if "how" in q["question"].lower() else "explainer",
}
briefs.append(brief)
return briefsPython Example
import os
import json
import time
import requests
from collections import defaultdict
API_KEY = os.environ.get("SCAVIO_API_KEY", "your_scavio_api_key")
ENDPOINT = "https://api.scavio.dev/api/v1/search"
KEYWORDS = ["vector database", "rag pipeline", "embedding model", "ai agent framework"]
def get_paa(kw: str) -> list[str]:
r = requests.post(ENDPOINT, headers={"x-api-key": API_KEY},
json={"query": kw, "country_code": "us"})
r.raise_for_status()
return [item["question"] for item in r.json().get("people_also_ask", [])]
def analyze():
all_questions = []
seen = set()
for kw in KEYWORDS:
for q in get_paa(kw):
if q.lower() not in seen:
seen.add(q.lower())
all_questions.append({"question": q, "seed": kw})
time.sleep(0.5)
print(f"Found {len(all_questions)} unique content gaps:")
for q in all_questions:
print(f" [{q['seed']}] {q['question']}")
return all_questions
if __name__ == "__main__":
gaps = analyze()
with open("content_gaps.json", "w") as f:
json.dump(gaps, f, indent=2)JavaScript Example
const API_KEY = process.env.SCAVIO_API_KEY || "your_scavio_api_key";
const ENDPOINT = "https://api.scavio.dev/api/v1/search";
async function getPAA(kw) {
const res = await fetch(ENDPOINT, {
method: "POST",
headers: { "x-api-key": API_KEY, "Content-Type": "application/json" },
body: JSON.stringify({ query: kw, country_code: "us" })
});
const data = await res.json();
return (data.people_also_ask || []).map(item => item.question);
}
async function main() {
const keywords = ["vector database", "rag pipeline", "embedding model"];
const seen = new Set();
const gaps = [];
for (const kw of keywords) {
const questions = await getPAA(kw);
for (const q of questions) {
if (!seen.has(q.toLowerCase())) {
seen.add(q.toLowerCase());
gaps.push({ question: q, seed: kw });
}
}
}
console.log(`${gaps.length} content gaps found:`);
gaps.forEach(g => console.log(` [${g.seed}] ${g.question}`));
}
main().catch(console.error);Expected Output
Found 16 unique content gaps:
[vector database] What is the best vector database in 2026?
[vector database] How does a vector database differ from a relational database?
[rag pipeline] What are the components of a RAG pipeline?
[rag pipeline] How do you evaluate RAG performance?
[embedding model] What is the difference between embeddings and fine-tuning?
[ai agent framework] What is the best AI agent framework for production?