Overview
This pipeline audits existing n8n workflows to find HTTP Request nodes that scrape websites and recommends replacing them with Scavio API calls. Export n8n workflow JSON, scan for HTTP Request nodes with HTML Extract downstream nodes, and flag them as scraping-dependent. For each flagged node, generate a replacement n8n HTTP Request configuration using Scavio's endpoint. The audit produces a migration plan with estimated reliability improvements.
Trigger
Manual trigger or monthly cron schedule
Schedule
Monthly or on-demand
Workflow Steps
Export n8n workflow JSON
Pull workflow definitions from n8n's API or export JSON files for all active workflows.
Scan for scraping-dependent nodes
Identify HTTP Request nodes followed by HTML Extract or similar parsing nodes.
Test current scraping endpoints
Execute each identified scraping URL to check if it still returns valid data or is blocked.
Generate Scavio replacement configs
For each scraping node, create a replacement HTTP Request node config pointing to Scavio API.
Output migration plan
Generate a report listing all scraping nodes, their current status, and the recommended replacement configuration.
Python Implementation
import requests
import json
from pathlib import Path
from datetime import datetime
API_KEY = "your_scavio_api_key"
def audit_workflow(workflow_json: dict) -> dict:
"""Audit an n8n workflow for scraping-dependent nodes."""
nodes = workflow_json.get("nodes", [])
scraping_nodes = []
for i, node in enumerate(nodes):
if node.get("type") == "n8n-nodes-base.httpRequest":
# Check if next node is HTML Extract
connections = workflow_json.get("connections", {})
node_name = node.get("name", "")
if node_name in connections:
downstream = connections[node_name]
for conn_list in downstream.values():
for conn in conn_list:
for target in conn:
target_node = next((n for n in nodes if n.get("name") == target.get("node")), None)
if target_node and "html" in target_node.get("type", "").lower():
scraping_nodes.append({
"node_name": node_name,
"url": node.get("parameters", {}).get("url", "unknown"),
"replacement": {
"type": "n8n-nodes-base.httpRequest",
"method": "POST",
"url": "https://api.scavio.dev/api/v1/search",
"headers": {"x-api-key": API_KEY},
"body": {"platform": "google", "query": "{{$json.query}}"},
},
})
return {
"workflow": workflow_json.get("name", "unknown"),
"total_nodes": len(nodes),
"scraping_nodes": len(scraping_nodes),
"migrations": scraping_nodes,
"audited_at": datetime.utcnow().isoformat(),
}
def run():
# Example: audit exported workflow files
audit_results = []
for wf_path in Path(".").glob("n8n_workflow_*.json"):
wf = json.loads(wf_path.read_text())
result = audit_workflow(wf)
audit_results.append(result)
print(f" {result['workflow']}: {result['scraping_nodes']} scraping nodes found")
if not audit_results:
print("No workflow files found. Export from n8n and save as n8n_workflow_*.json")
else:
Path("n8n_audit_report.json").write_text(json.dumps(audit_results, indent=2))
if __name__ == "__main__":
run()JavaScript Implementation
const API_KEY = "your_scavio_api_key";
function auditWorkflow(wf) {
const nodes = wf.nodes ?? [];
const scraping = [];
for (const node of nodes) {
if (node.type === "n8n-nodes-base.httpRequest") {
const conns = wf.connections?.[node.name] ?? {};
for (const connList of Object.values(conns)) {
for (const conn of connList) {
for (const target of conn) {
const tNode = nodes.find((n) => n.name === target.node);
if (tNode && tNode.type?.toLowerCase().includes("html")) {
scraping.push({
node: node.name,
url: node.parameters?.url ?? "unknown",
replacement: { method: "POST", url: "https://api.scavio.dev/api/v1/search", headers: { "x-api-key": API_KEY } },
});
}
}
}
}
}
}
return { workflow: wf.name ?? "unknown", scrapingNodes: scraping.length, migrations: scraping };
}
// Example usage with exported workflow JSON
const exampleWf = { name: "My Workflow", nodes: [], connections: {} };
const result = auditWorkflow(exampleWf);
console.log(`${result.workflow}: ${result.scrapingNodes} scraping nodes to migrate`);Platforms Used
Web search with knowledge graph, PAA, and AI overviews