Add negative filters to a B2B search pipeline by defining exclusion rules that remove irrelevant results before they reach your CRM or outreach tool. Without negative filtering, B2B search pipelines produce 30-50% noise: job boards, news aggregators, directories, and competitors clog your lead list. This tutorial builds a post-processing layer that filters search results by domain blocklist, keyword exclusion, and content signals, ensuring only qualified leads pass through.
Prerequisites
- Python 3.8+ installed
- requests library installed
- A Scavio API key from scavio.dev
- An existing B2B search pipeline or lead list
Walkthrough
Step 1: Define your negative filter rules
Set up blocklists for domains, keywords, and URL patterns that indicate non-lead results.
import os, requests
API_KEY = os.environ['SCAVIO_API_KEY']
BLOCKED_DOMAINS = {
'linkedin.com', 'indeed.com', 'glassdoor.com', 'crunchbase.com',
'wikipedia.org', 'reddit.com', 'youtube.com', 'medium.com',
'g2.com', 'capterra.com',
}
NEGATIVE_KEYWORDS = [
'job posting', 'careers', 'hiring', 'salary',
'review site', 'comparison chart', 'free template',
]
BLOCKED_URL_PATTERNS = ['/careers', '/jobs', '/hiring', '/press-release']Step 2: Build the filter functions
Create filter functions that check each result against domain, keyword, and URL pattern rules.
from urllib.parse import urlparse
def is_blocked_domain(url: str) -> bool:
domain = urlparse(url).netloc.replace('www.', '')
return any(blocked in domain for blocked in BLOCKED_DOMAINS)
def has_negative_keyword(title: str, snippet: str) -> bool:
text = f'{title} {snippet}'.lower()
return any(neg in text for neg in NEGATIVE_KEYWORDS)
def has_blocked_url_pattern(url: str) -> bool:
path = urlparse(url).path.lower()
return any(pattern in path for pattern in BLOCKED_URL_PATTERNS)
def is_valid_lead(result: dict) -> bool:
url = result.get('link', '')
title = result.get('title', '')
snippet = result.get('snippet', '')
if is_blocked_domain(url): return False
if has_negative_keyword(title, snippet): return False
if has_blocked_url_pattern(url): return False
return TrueStep 3: Apply filters to search results
Search for B2B leads and apply all negative filters, reporting how many results were filtered out.
def filtered_search(query: str) -> dict:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY},
json={'platform': 'google', 'query': query}, timeout=15)
results = resp.json().get('organic_results', [])
valid = [r for r in results if is_valid_lead(r)]
filtered_count = len(results) - len(valid)
return {
'query': query,
'total': len(results),
'valid': len(valid),
'filtered': filtered_count,
'leads': [{'title': r['title'], 'url': r['link'], 'snippet': r.get('snippet', '')} for r in valid],
}
result = filtered_search('martech companies series a 2026')
print(f"{result['valid']}/{result['total']} results passed filters")
for lead in result['leads'][:5]:
print(f" {lead['title']}")Step 4: Log filtered results for rule tuning
Save filtered-out results separately so you can review them and adjust your rules over time.
def search_with_logging(query: str, log_file: str = 'filter_log.jsonl') -> dict:
import json
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY},
json={'platform': 'google', 'query': query}, timeout=15)
results = resp.json().get('organic_results', [])
valid, rejected = [], []
for r in results:
if is_valid_lead(r):
valid.append(r)
else:
reason = 'domain' if is_blocked_domain(r.get('link', '')) else 'keyword' if has_negative_keyword(r.get('title', ''), r.get('snippet', '')) else 'url_pattern'
rejected.append({'title': r.get('title', ''), 'url': r.get('link', ''), 'reason': reason})
with open(log_file, 'a') as f:
f.write(json.dumps({'query': query, 'rejected': rejected}) + '\n')
print(f'{query}: {len(valid)} valid, {len(rejected)} rejected')
return {'leads': valid, 'rejected': rejected}
search_with_logging('fintech startups hiring engineers 2026')Python Example
import requests, os
from urllib.parse import urlparse
H = {'x-api-key': os.environ['SCAVIO_API_KEY']}
BLOCKED = {'linkedin.com', 'indeed.com', 'glassdoor.com', 'wikipedia.org'}
def filtered_search(query):
data = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': 'google', 'query': query}).json()
results = data.get('organic_results', [])
valid = [r for r in results if not any(b in urlparse(r.get('link', '')).netloc for b in BLOCKED)]
print(f'{len(valid)}/{len(results)} passed filters')
return valid
filtered_search('martech companies series a 2026')JavaScript Example
const H = {'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json'};
const BLOCKED = ['linkedin.com', 'indeed.com', 'glassdoor.com'];
async function filteredSearch(query) {
const r = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: H, body: JSON.stringify({platform: 'google', query})
});
const results = (await r.json()).organic_results || [];
const valid = results.filter(r => !BLOCKED.some(b => r.link?.includes(b)));
console.log(`${valid.length}/${results.length} passed filters`);
return valid;
}
filteredSearch('martech companies series a 2026');Expected Output
A filtered B2B search pipeline that removes noise results by domain blocklist, negative keywords, and URL patterns, with logging for rule tuning.