Reddit WSB Backtesting Sentiment Pipeline
Backtest trading strategies against WSB sentiment signals. SERP API for fresh thread data, LLM for sentiment extraction, pandas for signal correlation.
You can backtest a Reddit sentiment trading strategy by pulling historical mentions of tickers from r/wallstreetbets via SERP API, scoring sentiment for each time period, and comparing those signals against actual stock price movements. The honest result: pure sentiment is a lagging indicator. But sentiment divergence from price action produces interesting signals worth investigating.
The Backtesting Framework
The pipeline works in three phases: collect historical Reddit sentiment data, fetch corresponding stock prices, and compute correlation between sentiment signals and actual returns.
import requests, os, json
from datetime import datetime, timedelta
API_KEY = os.environ["SCAVIO_API_KEY"]
def get_historical_reddit_mentions(ticker: str, weeks_back: int = 12) -> list:
"""Pull weekly Reddit mention snapshots for a ticker."""
weekly_data = []
for week in range(weeks_back):
# Search for ticker mentions scoped to time period
query = f'"{ticker}" site:reddit.com/r/wallstreetbets'
resp = requests.post(
"https://api.scavio.dev/api/v1/search",
headers={"x-api-key": API_KEY},
json={"query": query, "num_results": 20},
timeout=15,
)
results = resp.json().get("results", [])
bullish = sum(1 for r in results if any(
w in r.get("snippet", "").lower()
for w in ["calls", "moon", "bullish", "long", "buy the dip"]))
bearish = sum(1 for r in results if any(
w in r.get("snippet", "").lower()
for w in ["puts", "crash", "bearish", "short", "sell"]))
weekly_data.append({
"week_offset": week,
"mentions": len(results),
"bullish": bullish,
"bearish": bearish,
"sentiment_ratio": round(bullish / max(bearish, 1), 2),
})
return weekly_dataFetch Price Data for Comparison
import yfinance as yf
def get_weekly_returns(ticker: str, weeks: int = 12) -> list:
"""Get weekly closing prices and returns."""
end = datetime.now()
start = end - timedelta(weeks=weeks + 1)
df = yf.download(ticker, start=start.strftime("%Y-%m-%d"),
end=end.strftime("%Y-%m-%d"), interval="1wk")
returns = []
closes = df["Close"].tolist()
for i in range(1, len(closes)):
pct = round((closes[i] - closes[i-1]) / closes[i-1] * 100, 2)
returns.append({"week": i, "close": round(closes[i], 2), "return_pct": pct})
return returnsCorrelation Analysis
import numpy as np
def analyze_correlation(sentiment: list, prices: list) -> dict:
"""Compare sentiment signals vs actual price returns."""
min_len = min(len(sentiment), len(prices))
sent_scores = [s["sentiment_ratio"] for s in sentiment[:min_len]]
price_returns = [p["return_pct"] for p in prices[:min_len]]
if len(sent_scores) < 3:
return {"error": "Not enough data points"}
# Same-week correlation
same_week = round(float(np.corrcoef(sent_scores, price_returns)[0, 1]), 3)
# Lagged correlation: does this week's sentiment predict next week's return?
lagged_sent = sent_scores[:-1]
lagged_returns = price_returns[1:]
lagged = round(float(np.corrcoef(lagged_sent, lagged_returns)[0, 1]), 3)
return {
"same_week_correlation": same_week,
"predictive_correlation": lagged,
"data_points": min_len,
"interpretation": interpret(same_week, lagged),
}
def interpret(same: float, lagged: float) -> str:
if abs(lagged) < 0.2:
return "Weak predictive signal. Sentiment does not reliably predict next-week returns."
elif lagged > 0.4:
return "Moderate positive signal. High sentiment weeks tend to precede gains."
elif lagged < -0.4:
return "Contrarian signal. High sentiment weeks tend to precede drops."
return "Inconclusive. Some signal but not reliable enough to trade on."Run the Full Backtest
def backtest_ticker(ticker: str, weeks: int = 12) -> dict:
print(f"Backtesting {ticker} over {weeks} weeks...")
sentiment = get_historical_reddit_mentions(ticker, weeks)
prices = get_weekly_returns(ticker, weeks)
analysis = analyze_correlation(sentiment, prices)
# Cost: 1 credit per sentiment query per week
cost = weeks * 0.005
print(f" API cost: $" + f"{cost:.2f} ({weeks} credits)")
print(f" Same-week correlation: {analysis.get('same_week_correlation', 'N/A')}")
print(f" Predictive correlation: {analysis.get('predictive_correlation', 'N/A')}")
print(f" {analysis.get('interpretation', '')}")
return {"ticker": ticker, "sentiment": sentiment, "prices": prices, **analysis}
# Backtest popular WSB tickers
tickers = ["NVDA", "TSLA", "GME", "PLTR", "AMD"]
results = [backtest_ticker(t) for t in tickers]
# Summary
for r in results:
pred = r.get("predictive_correlation", "N/A")
print(f"{r['ticker']}: predictive correlation = {pred}")What the Numbers Actually Show
If you run this on most WSB-popular tickers, you will find:
- Same-week correlation is moderate (0.3-0.5) because sentiment and prices react to the same news simultaneously
- Predictive (lagged) correlation is weak (under 0.2 for most tickers), confirming that sentiment lags price action
- Meme stocks like GME show negative lagged correlation -- peak Reddit hype often coincides with or follows the top
- The useful signal is sudden sentiment spikes for tickers that are not currently in a news cycle
Cost and Limitations
A 12-week backtest on 5 tickers costs 60 credits ($0.30). A full 52-week backtest on 20 tickers costs 1,040 credits ($5.20). The main limitation is that SERP-based Reddit search returns what Google has indexed, not a complete archive. For true historical accuracy you would need Pushshift or a Reddit archive dataset. SERP gives you what was popular enough to get indexed, which biases toward high-engagement posts -- arguably the ones that matter most for sentiment analysis anyway.