Cloudflare blocks scrapers aggressively in 2026, and 'blocked by cloudflare for 3 days' is the top thread in r/vibecoding. The fix is to detect the Cloudflare challenge page early and route affected URLs to a managed API. This tutorial shows how to build the detector and fall back cleanly.
Prerequisites
- Python 3.8+
- requests library
- A Scavio API key (for fallback)
Walkthrough
Step 1: Identify Cloudflare response signatures
Cloudflare challenges return a 403 or 503 with specific HTML markers.
CF_MARKERS = [
'cf-browser-verification',
'cf_chl_opt',
'checking your browser',
'ray id'
]Step 2: Build the detector
Return True if the response contains any Cloudflare markers.
def is_cloudflare_blocked(response):
if response.status_code in (403, 503):
return True
body = response.text.lower()
return any(m in body for m in CF_MARKERS)Step 3: Try direct scrape first
Attempt a direct fetch with a browser-like user-agent.
import requests
HEADERS = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'}
def try_direct(url):
r = requests.get(url, headers=HEADERS, timeout=10)
return r if not is_cloudflare_blocked(r) else NoneStep 4: Fall back to Scavio
When blocked, route through Scavio's managed extractor.
import os
def fallback_scavio(url):
r = requests.post('https://api.scavio.dev/api/v1/extract',
headers={'x-api-key': os.environ['SCAVIO_API_KEY']},
json={'url': url, 'render_js': True, 'bypass_cloudflare': True})
return r.json()Step 5: Combine into a resilient fetcher
Try direct, fall back on detection.
def resilient_fetch(url):
direct = try_direct(url)
if direct:
return {'source': 'direct', 'content': direct.text}
print(f'Cloudflare detected on {url}, falling back to Scavio')
return {'source': 'scavio', 'content': fallback_scavio(url).get('html', '')}Python Example
import os, requests
CF_MARKERS = ['cf-browser-verification', 'cf_chl_opt', 'checking your browser', 'ray id']
HEADERS = {'User-Agent': 'Mozilla/5.0'}
def is_cf(response):
return response.status_code in (403, 503) or any(m in response.text.lower() for m in CF_MARKERS)
def fetch(url):
try:
r = requests.get(url, headers=HEADERS, timeout=10)
if not is_cf(r):
return r.text
except Exception:
pass
r = requests.post('https://api.scavio.dev/api/v1/extract',
headers={'x-api-key': os.environ['SCAVIO_API_KEY']},
json={'url': url, 'render_js': True, 'bypass_cloudflare': True})
return r.json().get('html', '')
print(fetch('https://example-cloudflare-site.com')[:200])JavaScript Example
const CF_MARKERS = ['cf-browser-verification', 'cf_chl_opt', 'checking your browser', 'ray id'];
async function fetchResilient(url) {
try {
const r = await fetch(url, { headers: { 'User-Agent': 'Mozilla/5.0' } });
const body = (await r.text()).toLowerCase();
if (r.ok && !CF_MARKERS.some(m => body.includes(m))) return body;
} catch {}
const r = await fetch('https://api.scavio.dev/api/v1/extract', {
method: 'POST',
headers: { 'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ url, render_js: true, bypass_cloudflare: true })
});
return (await r.json()).html;
}
console.log(await fetchResilient('https://example-cloudflare-site.com'));Expected Output
When the target URL is not blocked, the direct fetch succeeds and returns the HTML. When blocked, the detector prints a Cloudflare alert and Scavio's extractor returns the page content anyway.