Improve AI agent research accuracy by implementing multi-query search expansion, cross-referencing results across sources, scoring confidence based on source agreement, and injecting only high-confidence data into the agent context. Agents that search once and use the first result frequently produce inaccurate or incomplete research. A multi-query approach searches for the same topic from different angles, compares results, and only surfaces information that appears consistently across sources. This dramatically reduces hallucination from single-source reliance.
Prerequisites
- Python 3.8+ installed
- requests library installed
- A Scavio API key from scavio.dev
- An AI agent that performs research tasks
Walkthrough
Step 1: Expand query to multiple angles
Generate multiple search queries from different angles for the same research topic.
import os, requests, time
API_KEY = os.environ['SCAVIO_API_KEY']
def expand_queries(topic: str) -> list:
"""Generate multiple search angles for a research topic."""
expansions = [
topic,
f'{topic} overview',
f'{topic} comparison 2026',
f'{topic} pros cons',
f'{topic} alternatives',
]
return expansions
def search(query: str) -> list:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY},
json={'platform': 'google', 'query': query}, timeout=15)
return resp.json().get('organic_results', [])
queries = expand_queries('vector database')
print(f'Expanded to {len(queries)} queries:')
for q in queries:
print(f' - {q}')Step 2: Search all angles
Execute all expanded queries and collect results.
def multi_search(topic: str) -> dict:
queries = expand_queries(topic)
all_results = {}
for query in queries:
results = search(query)
all_results[query] = results[:5]
time.sleep(0.3)
total = sum(len(r) for r in all_results.values())
print(f'Searched {len(queries)} queries, got {total} total results')
return all_results
results = multi_search('vector database')
for query, items in results.items():
print(f' {query[:40]}: {len(items)} results')Step 3: Cross-reference sources
Find information that appears consistently across multiple search results.
from collections import Counter
from urllib.parse import urlparse
def cross_reference(all_results: dict) -> list:
domain_counts = Counter()
domain_snippets = {}
for query, results in all_results.items():
for r in results:
domain = urlparse(r.get('link', '')).netloc
if domain:
domain_counts[domain] += 1
if domain not in domain_snippets:
domain_snippets[domain] = {
'title': r.get('title', ''),
'snippet': r.get('snippet', ''),
'url': r.get('link', ''),
}
# Sources appearing in multiple queries are more reliable
reliable = []
for domain, count in domain_counts.most_common():
if count >= 2: # Appeared in at least 2 query results
info = domain_snippets[domain]
info['cross_ref_count'] = count
info['domain'] = domain
reliable.append(info)
return reliable
reliable = cross_reference(results)
print(f'Cross-referenced sources: {len(reliable)}')
for r in reliable[:3]:
print(f' [{r["cross_ref_count"]}x] {r["domain"]}: {r["title"][:50]}')Step 4: Score confidence
Assign confidence scores to research findings based on source agreement and quality.
def score_confidence(reliable_sources: list, all_results: dict) -> list:
total_queries = len(all_results)
scored = []
for source in reliable_sources:
score = 0
# Cross-reference score (max 40)
score += min(source['cross_ref_count'] * 10, 40)
# Snippet quality (max 20)
if len(source.get('snippet', '')) > 100:
score += 20
elif len(source.get('snippet', '')) > 50:
score += 10
# Domain authority heuristic (max 20)
trusted = ['wikipedia', 'github', 'stackoverflow', 'docs.']
if any(t in source.get('domain', '').lower() for t in trusted):
score += 20
# Title relevance (max 20)
if source.get('title'):
score += 20
source['confidence'] = min(score, 100)
scored.append(source)
scored.sort(key=lambda x: x['confidence'], reverse=True)
return scored
scored = score_confidence(reliable, results)
for s in scored[:5]:
print(f' [{s["confidence"]}%] {s["title"][:50]}')Step 5: Build grounded research context
Format high-confidence findings into a context block for the agent.
def research_context(topic: str, min_confidence: int = 40) -> str:
all_results = multi_search(topic)
reliable = cross_reference(all_results)
scored = score_confidence(reliable, all_results)
high_conf = [s for s in scored if s['confidence'] >= min_confidence]
if not high_conf:
return f'No high-confidence sources found for "{topic}".'
parts = [f'RESEARCH CONTEXT: {topic}', f'Sources: {len(high_conf)} (confidence >= {min_confidence}%)', '']
for s in high_conf[:5]:
parts.append(f'[{s["confidence"]}% confidence] {s["title"]}')
parts.append(f' Source: {s["domain"]}')
if s.get('snippet'):
parts.append(f' Summary: {s["snippet"][:200]}')
parts.append('')
return '\n'.join(parts)
context = research_context('vector database')
print(context[:500])Python Example
import requests, os
H = {'x-api-key': os.environ['SCAVIO_API_KEY']}
def research(topic):
queries = [topic, f'{topic} comparison', f'{topic} overview']
all_snippets = []
for q in queries:
data = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': 'google', 'query': q}).json()
for r in data.get('organic_results', [])[:3]:
all_snippets.append(r.get('snippet', '')[:100])
return all_snippets
print(research('vector database'))JavaScript Example
const H = {'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json'};
async function research(topic) {
const queries = [topic, `${topic} comparison`, `${topic} overview`];
const snippets = [];
for (const q of queries) {
const r = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: H,
body: JSON.stringify({platform: 'google', query: q})
});
const results = (await r.json()).organic_results || [];
results.slice(0, 3).forEach(r => snippets.push((r.snippet || '').slice(0, 100)));
}
return snippets;
}
research('vector database').then(console.log);Expected Output
A multi-query research pipeline that expands topics into multiple search angles, cross-references sources, scores confidence, and outputs only high-confidence grounded research.