MCP 工具描述是隐藏成本。您的代理可以调用的每个工具都会向系统提示添加描述,在用户说出任何内容之前消耗上下文令牌。对于多个 MCP 服务器,工具描述可能会占用 10-20% 的上下文窗口。本教程构建了一个令牌审核工具,该工具可以准确测量每个 MCP 服务器消耗的令牌数量、识别最严重的违规者,并展示如何切换到 Scavio (mcp.scavio.dev/mcp) 将 6 个平台整合到一台服务器中。
前置条件
- 已安装 Python 3.9+
- 安装 tiktoken 库(pip install tiktoken)
- 访问您的 MCP 配置文件
- 来自 scavio.dev 的用于比较的 Scavio API 密钥
操作指南
步骤 1: 加载并解析您的 MCP 配置
读取您的 MCP 配置文件并列出所有配置的服务器及其工具数量。
import json, os
def load_mcp_configs() -> dict:
"""Load MCP configs from all standard locations."""
configs = {}
paths = {
'claude_desktop': os.path.expanduser('~/.config/claude/claude_desktop_config.json'),
'cursor': os.path.expanduser('~/.cursor/mcp.json'),
'project': '.mcp.json',
}
for name, path in paths.items():
if os.path.exists(path):
with open(path) as f:
data = json.load(f)
servers = data.get('mcpServers', {})
configs[name] = {
'path': path,
'servers': servers,
'server_count': len(servers),
}
return configs
configs = load_mcp_configs()
for name, cfg in configs.items():
print(f'{name} ({cfg["path"]}):')
print(f' Servers: {cfg["server_count"]}')
for server_name in cfg['servers']:
print(f' - {server_name}')步骤 2: 估计每台服务器的令牌成本
使用 tiktoken 统计工具描述中的 token。每个工具都有一个名称、描述和参数架构,它们都消耗令牌。
import tiktoken
def count_tokens(text: str, model: str = 'gpt-4o') -> int:
enc = tiktoken.encoding_for_model(model)
return len(enc.encode(text))
# Typical tool description sizes (measured from real MCP servers)
SERVER_ESTIMATES = {
'filesystem': {'tools': 11, 'avg_desc_tokens': 180},
'github': {'tools': 25, 'avg_desc_tokens': 250},
'slack': {'tools': 12, 'avg_desc_tokens': 200},
'postgres': {'tools': 8, 'avg_desc_tokens': 220},
'memory': {'tools': 5, 'avg_desc_tokens': 150},
'brave-search': {'tools': 2, 'avg_desc_tokens': 300},
'scavio': {'tools': 3, 'avg_desc_tokens': 250},
'tavily': {'tools': 2, 'avg_desc_tokens': 280},
'puppeteer': {'tools': 8, 'avg_desc_tokens': 200},
'sequential-thinking': {'tools': 1, 'avg_desc_tokens': 400},
}
def estimate_server_tokens(server_name: str) -> int:
"""Estimate token cost for a known MCP server."""
if server_name in SERVER_ESTIMATES:
est = SERVER_ESTIMATES[server_name]
return est['tools'] * est['avg_desc_tokens']
return 5 * 200 # Default: 5 tools x 200 tokens
# Audit all servers
print('MCP Server Token Audit')
print('=' * 50)
total = 0
for name, est in sorted(SERVER_ESTIMATES.items(), key=lambda x: x[1]['tools'] * x[1]['avg_desc_tokens'], reverse=True):
tokens = est['tools'] * est['avg_desc_tokens']
total += tokens
print(f'{name:25s} {est["tools"]:3d} tools x {est["avg_desc_tokens"]:3d} tok = {tokens:5,} tokens')
print(f'{"TOTAL":25s} {"":>18s} {total:5,} tokens')步骤 3: 计算工具描述令牌的成本
将代币数量转换为实际的美元成本。按照目前的 LLM 定价,当您每天发出数百个请求时,工具描述令牌会快速增加。
def cost_analysis(servers: list[str], requests_per_day: int = 100) -> dict:
"""Calculate the real cost of MCP tool description tokens."""
total_tokens = sum(estimate_server_tokens(s) for s in servers)
# LLM input token pricing (2026 rates)
pricing = {
'gpt-4o': 2.50 / 1_000_000,
'gpt-4o-mini': 0.15 / 1_000_000,
'claude-sonnet-4': 3.00 / 1_000_000,
'claude-opus-4': 15.00 / 1_000_000,
}
results = {'total_tokens_per_request': total_tokens, 'servers': servers}
print(f'Servers: {", ".join(servers)}')
print(f'Token overhead per request: {total_tokens:,}')
print(f'\nMonthly cost at {requests_per_day} requests/day:')
for model, rate in pricing.items():
daily = total_tokens * requests_per_day * rate
monthly = daily * 30
print(f' {model:25s} ${monthly:6.2f}/month')
results[model] = monthly
return results
# Full global config: all servers
print('GLOBAL CONFIG (all servers):')
full = cost_analysis(list(SERVER_ESTIMATES.keys()), requests_per_day=200)
print()
# Scoped config: just Scavio
print('SCOPED CONFIG (Scavio only):')
scoped = cost_analysis(['scavio'], requests_per_day=200)
print()
print(f'Monthly savings with Claude Sonnet: '
f'${full.get("claude-sonnet-4",0) - scoped.get("claude-sonnet-4",0):.2f}')步骤 4: 生成优化建议
分析您当前的设置并推荐每个项目的服务器范围。确定可以由 Scavio 替换的服务器以进行整合。
def optimize_mcp(current_servers: list[str]) -> dict:
"""Generate MCP optimization recommendations."""
# Servers that Scavio can replace
replaceable = {
'brave-search': 'Scavio covers web search + 5 more platforms',
'tavily': 'Scavio provides similar web search at $0.005/credit',
'google-search': 'Scavio includes Google search',
}
recommendations = []
tokens_before = sum(estimate_server_tokens(s) for s in current_servers)
new_servers = []
scavio_added = False
for s in current_servers:
if s in replaceable:
recommendations.append({
'action': 'REPLACE',
'server': s,
'reason': replaceable[s],
'token_savings': estimate_server_tokens(s),
})
if not scavio_added:
new_servers.append('scavio')
scavio_added = True
else:
new_servers.append(s)
tokens_after = sum(estimate_server_tokens(s) for s in new_servers)
print('MCP Optimization Recommendations')
print('=' * 50)
for rec in recommendations:
print(f'REPLACE {rec["server"]} -> Scavio')
print(f' Reason: {rec["reason"]}')
print(f' Token savings: {rec["token_savings"]:,}')
print(f'\nBefore: {tokens_before:,} tokens ({len(current_servers)} servers)')
print(f'After: {tokens_after:,} tokens ({len(new_servers)} servers)')
print(f'Saved: {tokens_before - tokens_after:,} tokens/request')
return {'before': tokens_before, 'after': tokens_after, 'recommendations': recommendations}
optimize_mcp(['filesystem', 'github', 'brave-search', 'tavily', 'slack', 'memory'])Python 示例
import json, os
SERVER_TOKEN_EST = {
'filesystem': 1980, 'github': 6250, 'slack': 2400,
'brave-search': 600, 'scavio': 750, 'tavily': 560,
'memory': 750, 'postgres': 1760,
}
def audit_mcp(config_path='.mcp.json'):
if not os.path.exists(config_path):
print(f'{config_path} not found')
return
with open(config_path) as f:
servers = json.load(f).get('mcpServers', {})
total = 0
for name in servers:
tokens = SERVER_TOKEN_EST.get(name, 1000)
total += tokens
print(f' {name}: ~{tokens:,} tokens')
print(f'Total: ~{total:,} tokens per request')
print(f'Monthly cost (Sonnet, 100 req/day): ${total * 100 * 30 * 3 / 1e6:.2f}')
audit_mcp()JavaScript 示例
const fs = require('fs');
const TOKEN_EST = {
filesystem: 1980, github: 6250, slack: 2400,
'brave-search': 600, scavio: 750, tavily: 560,
memory: 750, postgres: 1760,
};
function auditMcp(configPath = '.mcp.json') {
if (!fs.existsSync(configPath)) return console.log('Config not found');
const servers = JSON.parse(fs.readFileSync(configPath, 'utf8')).mcpServers || {};
let total = 0;
for (const name of Object.keys(servers)) {
const tokens = TOKEN_EST[name] || 1000;
total += tokens;
console.log(` ${name}: ~${tokens.toLocaleString()} tokens`);
}
console.log(`Total: ~${total.toLocaleString()} tokens per request`);
console.log(`Monthly cost (Sonnet, 100 req/day): $${(total * 100 * 30 * 3 / 1e6).toFixed(2)}`);
}
auditMcp();预期输出
MCP Server Token Audit
==================================================
github 25 tools x 250 tok = 6,250 tokens
filesystem 11 tools x 180 tok = 1,980 tokens
slack 12 tools x 200 tok = 2,400 tokens
postgres 8 tools x 220 tok = 1,760 tokens
scavio 3 tools x 250 tok = 750 tokens
TOTAL 19,310 tokens
MCP Optimization Recommendations
==================================================
REPLACE brave-search -> Scavio
Reason: Scavio covers web search + 5 more platforms
REPLACE tavily -> Scavio
Reason: Scavio provides similar web search at $0.005/credit
Before: 15,540 tokens (6 servers)
After: 13,130 tokens (5 servers)
Saved: 2,410 tokens/request