Ground a local LLM with real-time news by detecting when a user query relates to current events, searching for recent news through the search API, and injecting the results into the model's context window before generation. Local LLMs like Llama and Mistral have static knowledge cutoffs and cannot answer questions about recent events. News grounding solves this by providing current information at inference time, without fine-tuning or retraining. The search call adds minimal latency and prevents the hallucinated news stories that local LLMs generate when asked about events they have no data on.
Prerequisites
- Python 3.8+ installed
- requests library installed
- A Scavio API key from scavio.dev
- A local LLM running (via Ollama, llama.cpp, or similar)
Walkthrough
Step 1: Detect news-worthy queries
Build a classifier that identifies queries needing current news data.
import os, requests
API_KEY = os.environ['SCAVIO_API_KEY']
NEWS_SIGNALS = [
'latest', 'recent', 'today', 'this week', 'breaking',
'update', 'announcement', 'just happened', 'new release',
'election', 'earnings', 'ipo', 'acquisition', 'merger',
'what happened', 'current', 'news about',
]
TIME_ENTITIES = ['2025', '2026', 'yesterday', 'last week', 'this month']
def needs_news(query: str) -> bool:
q_lower = query.lower()
if any(signal in q_lower for signal in NEWS_SIGNALS):
return True
if any(entity in q_lower for entity in TIME_ENTITIES):
return True
if '?' in query and any(w in q_lower for w in ['who', 'what', 'when', 'where']):
return True
return False
print(needs_news('What happened with the OpenAI announcement?')) # True
print(needs_news('How do Python list comprehensions work?')) # False
print(needs_news('Latest AI agent frameworks 2026')) # TrueStep 2: Search for current news
Query the search API for recent news articles related to the user's question.
def search_news(query: str, limit: int = 5) -> list:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY},
json={'platform': 'google', 'query': f'{query} news 2026'}, timeout=15)
results = resp.json().get('organic_results', [])
news = []
for r in results[:limit]:
news.append({
'title': r.get('title', ''),
'snippet': r.get('snippet', ''),
'source': r.get('source', ''),
'url': r.get('link', ''),
})
return news
news = search_news('latest AI agent frameworks')
for n in news:
print(f" {n['source']}: {n['title'][:50]}")Step 3: Format context for LLM
Structure the news results as a context block that fits naturally in the LLM prompt.
def format_news_context(news: list) -> str:
if not news:
return ''
parts = ['CURRENT NEWS CONTEXT (live search results):', '']
for i, n in enumerate(news[:3], 1):
parts.append(f'{i}. {n["title"]}')
if n.get('source'):
parts.append(f' Source: {n["source"]}')
if n.get('snippet'):
parts.append(f' Summary: {n["snippet"][:200]}')
parts.append('')
parts.append('Use the above current information to answer the question accurately.')
parts.append('Cite sources when referencing specific news items.')
return '\n'.join(parts)
context = format_news_context(news)
print(context[:400])Step 4: Inject into LLM prompt
Build the complete prompt with news context injected before the user query.
def grounded_prompt(query: str, system: str = 'You are a helpful assistant.') -> str:
parts = [system]
if needs_news(query):
news = search_news(query)
news_context = format_news_context(news)
if news_context:
parts.append(news_context)
parts.append(f'User: {query}')
parts.append('Assistant:')
return '\n\n'.join(parts)
# Example with news grounding:
prompt = grounded_prompt('What are the latest AI agent frameworks in 2026?')
print(f'Prompt length: {len(prompt)} chars')
print(prompt[:500])
# Example without grounding:
prompt_no_news = grounded_prompt('How do Python decorators work?')
print(f'\nNo-news prompt length: {len(prompt_no_news)} chars')Step 5: Test grounding quality
Verify the grounding pipeline produces relevant, current context for news queries.
def test_grounding():
test_cases = [
('Latest Python release', True),
('Python list comprehension syntax', False),
('Who won the latest tech IPO?', True),
('How to use git rebase', False),
('AI regulation news 2026', True),
]
for query, expected_news in test_cases:
detected = needs_news(query)
status = 'PASS' if detected == expected_news else 'FAIL'
print(f'[{status}] "{query}" -> needs_news={detected} (expected={expected_news})')
if detected:
news = search_news(query)
print(f' Found {len(news)} news items')
test_grounding()Python Example
import requests, os
H = {'x-api-key': os.environ['SCAVIO_API_KEY']}
def news_context(query):
data = requests.post('https://api.scavio.dev/api/v1/search', headers=H,
json={'platform': 'google', 'query': f'{query} news 2026'}).json()
news = data.get('organic_results', [])[:3]
return '\n'.join(f"{n.get('title', '')}: {n.get('snippet', '')[:80]}" for n in news)
print(news_context('latest AI frameworks'))JavaScript Example
const H = {'x-api-key': process.env.SCAVIO_API_KEY, 'Content-Type': 'application/json'};
async function newsContext(query) {
const r = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST', headers: H,
body: JSON.stringify({platform: 'google', query: `${query} news 2026`})
});
const news = (await r.json()).organic_results || [];
return news.slice(0, 3).map(n => `${n.title}: ${(n.snippet || '').slice(0, 80)}`).join('\n');
}
newsContext('latest AI frameworks').then(console.log);Expected Output
A local LLM grounding pipeline that detects news-worthy queries, fetches current news via search API, and injects real-time context into the model prompt.