Production AI agents using MCP servers need observability into tool-level performance. When search quality degrades, you need to know whether the MCP server is down, the upstream provider is rate-limited, or the agent is making poor tool selections. This tutorial builds a health monitoring system that checks all MCP tools, logs performance metrics, and alerts on degradation.
Prerequisites
- Python 3.8+ installed
- requests library installed
- A Scavio API key from scavio.dev
- A Slack webhook URL (optional, for alerts)
Walkthrough
Step 1: Define the health check function
Check each search platform with a test query and measure latency and result count.
import requests, os, time, json
H = {'x-api-key': os.environ['SCAVIO_API_KEY']}
PLATFORMS = ['google', 'reddit', 'youtube', 'amazon', 'walmart']
def health_check_all() -> dict:
report = {'timestamp': time.strftime('%Y-%m-%dT%H:%M:%SZ')}
for platform in PLATFORMS:
start = time.time()
try:
resp = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': platform, 'query': 'test query'}, timeout=15)
latency = round(time.time() - start, 3)
data = resp.json()
result_count = len(data.get('organic', []))
report[platform] = {'status': 'ok', 'latency_s': latency,
'results': result_count, 'http_code': resp.status_code}
except requests.Timeout:
report[platform] = {'status': 'timeout', 'latency_s': round(time.time() - start, 3)}
except Exception as e:
report[platform] = {'status': 'error', 'error': str(e)}
return reportStep 2: Set alerting thresholds
Define what constitutes degraded performance for each metric.
THRESHOLDS = {
'max_latency_s': 5.0,
'min_results': 1,
'alert_on_error': True,
}
def check_alerts(report: dict) -> list:
alerts = []
for platform in PLATFORMS:
data = report.get(platform, {})
if data.get('status') == 'error' or data.get('status') == 'timeout':
alerts.append(f'{platform}: {data.get("status")} - {data.get("error", "timeout")}')
elif data.get('latency_s', 0) > THRESHOLDS['max_latency_s']:
alerts.append(f'{platform}: high latency {data["latency_s"]}s')
elif data.get('results', 0) < THRESHOLDS['min_results']:
alerts.append(f'{platform}: low results ({data["results"]})')
return alertsStep 3: Send alerts to Slack
Forward alerts to a Slack channel for on-call visibility.
SLACK_WEBHOOK = os.environ.get('SLACK_WEBHOOK_URL', '')
def send_slack_alert(alerts: list):
if not alerts or not SLACK_WEBHOOK:
return
text = 'MCP Search Health Alert:\n' + '\n'.join(f'- {a}' for a in alerts)
requests.post(SLACK_WEBHOOK, json={'text': text}, timeout=5)
# Run and alert:
report = health_check_all()
alerts = check_alerts(report)
if alerts:
send_slack_alert(alerts)
print(f'ALERTS: {alerts}')
else:
print('All platforms healthy')Step 4: Log health data for trending
Append each health check to a JSONL file for historical analysis.
def log_health(report: dict, filepath: str = 'mcp_health.jsonl'):
with open(filepath, 'a') as f:
f.write(json.dumps(report) + '\n')
# Run as cron: */5 * * * * python mcp_health_check.py
report = health_check_all()
log_health(report)
alerts = check_alerts(report)
if alerts:
send_slack_alert(alerts)Python Example
import requests, os, time
H = {'x-api-key': os.environ['SCAVIO_API_KEY']}
def quick_health():
for p in ['google', 'reddit', 'youtube', 'amazon', 'walmart']:
start = time.time()
try:
r = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': p, 'query': 'test'}, timeout=10)
print(f'{p:10s} {r.status_code} {time.time()-start:.2f}s {len(r.json().get("organic",[]))} results')
except Exception as e:
print(f'{p:10s} ERROR {e}')
quick_health()JavaScript Example
async function quickHealth() {
for (const p of ['google', 'reddit', 'youtube', 'amazon', 'walmart']) {
const start = Date.now();
try {
const r = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: {'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json'},
body: JSON.stringify({platform: p, query: 'test'})
});
const data = await r.json();
console.log(`${p.padEnd(10)} ${r.status} ${Date.now()-start}ms ${(data.organic||[]).length} results`);
} catch (e) { console.log(`${p.padEnd(10)} ERROR ${e.message}`); }
}
}Expected Output
A health monitoring system that checks MCP search tools every 5 minutes, alerts on degradation via Slack, and logs metrics for trending.