LangGraph agents gain significant value when they combine persistent memory with live search grounding. Memory lets the agent recall past interactions and user preferences, while search grounding prevents hallucination by injecting real-time data. This tutorial builds a LangGraph agent that maintains conversation state across turns, uses the Scavio search API as a tool, and decides when to search versus when to rely on memory. The search tool costs $0.005 per call, so even chatty agents stay under a few dollars per month.
Prerequisites
- Python 3.10+ installed
- langgraph and langchain-openai packages installed
- OpenAI API key for the LLM
- Scavio API key for search grounding
Walkthrough
Step 1: Define the agent state schema
LangGraph uses typed state to manage conversation flow. Define a state that holds messages, search results, and a memory store.
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage
import operator
class AgentState(TypedDict):
messages: Annotated[Sequence[BaseMessage], operator.add]
search_results: str
memory_context: strStep 2: Build the search tool node
Create a node that calls the Scavio API when the agent decides it needs current information. The node receives the state and returns updated search_results.
import requests, os
API_KEY = os.environ['SCAVIO_API_KEY']
def search_node(state: AgentState) -> dict:
last_msg = state['messages'][-1].content
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY, 'Content-Type': 'application/json'},
json={'query': last_msg, 'country_code': 'us'})
resp.raise_for_status()
results = resp.json().get('organic_results', [])[:5]
context = '\n'.join(f'- {r["title"]}: {r.get("snippet", "")}' for r in results)
return {'search_results': context}Step 3: Build the memory management node
Create a simple memory store that persists key facts from the conversation. The agent can reference these in future turns without re-searching.
memory_store = {}
def memory_node(state: AgentState) -> dict:
# Extract facts from the latest exchange
messages = state['messages']
if len(messages) >= 2:
user_msg = messages[-2].content if len(messages) >= 2 else ''
ai_msg = messages[-1].content if messages else ''
# Store key topic as memory
key = user_msg[:50]
memory_store[key] = ai_msg[:200]
# Build memory context from recent entries
recent = list(memory_store.items())[-5:]
ctx = '\n'.join(f'Previously discussed: {k} -> {v[:80]}' for k, v in recent)
return {'memory_context': ctx}Step 4: Build the LLM response node
The response node combines search results, memory context, and the user message to generate a grounded, context-aware answer.
from langchain_openai import ChatOpenAI
from langchain_core.messages import AIMessage, SystemMessage
llm = ChatOpenAI(model='gpt-4o', temperature=0)
def respond_node(state: AgentState) -> dict:
system = f"""You are a helpful assistant with web search and memory.
Memory from past conversations:
{state.get('memory_context', 'No prior context.')}
Current search results:
{state.get('search_results', 'No search performed.')}
Use search results for factual claims. Use memory to personalize responses."""
messages = [SystemMessage(content=system)] + list(state['messages'])
response = llm.invoke(messages)
return {'messages': [response]}Step 5: Assemble the graph with conditional routing
Wire the nodes into a LangGraph with a router that decides whether to search or reply directly. Questions about current events trigger search; follow-ups use memory.
from langgraph.graph import StateGraph, END
def should_search(state: AgentState) -> str:
last_msg = state['messages'][-1].content.lower()
search_triggers = ['latest', 'current', 'price', '2026', 'today', 'news', 'best']
if any(t in last_msg for t in search_triggers):
return 'search'
return 'respond'
graph = StateGraph(AgentState)
graph.add_node('search', search_node)
graph.add_node('respond', respond_node)
graph.add_node('memory', memory_node)
graph.set_entry_point('search')
graph.add_conditional_edges('search', should_search, {
'search': 'search',
'respond': 'respond'
})
graph.add_edge('search', 'respond')
graph.add_edge('respond', 'memory')
graph.add_edge('memory', END)
app = graph.compile()
print('Agent graph compiled with search + memory nodes')Python Example
import os, requests
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph, END
import operator
API_KEY = os.environ['SCAVIO_API_KEY']
llm = ChatOpenAI(model='gpt-4o', temperature=0)
class AgentState(TypedDict):
messages: Annotated[Sequence[BaseMessage], operator.add]
search_results: str
def search_node(state):
last = state['messages'][-1].content
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': API_KEY, 'Content-Type': 'application/json'},
json={'query': last, 'country_code': 'us'})
results = resp.json().get('organic_results', [])[:5]
ctx = '\n'.join(f'- {r["title"]}: {r.get("snippet", "")}' for r in results)
return {'search_results': ctx}
def respond_node(state):
sys = f'Use these search results:\n{state.get("search_results", "")}'
msgs = [SystemMessage(content=sys)] + list(state['messages'])
return {'messages': [llm.invoke(msgs)]}
graph = StateGraph(AgentState)
graph.add_node('search', search_node)
graph.add_node('respond', respond_node)
graph.set_entry_point('search')
graph.add_edge('search', 'respond')
graph.add_edge('respond', END)
app = graph.compile()
result = app.invoke({'messages': [HumanMessage(content='Best Python frameworks 2026')], 'search_results': ''})
print(result['messages'][-1].content)JavaScript Example
// LangGraph is Python-only; this JS example shows the equivalent pattern
const API_KEY = process.env.SCAVIO_API_KEY;
async function searchGrounding(query) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': API_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us' })
});
const data = await resp.json();
return (data.organic_results || []).slice(0, 5)
.map(r => `- ${r.title}: ${r.snippet || ''}`).join('\n');
}
const memory = [];
async function agentTurn(userMessage) {
const context = await searchGrounding(userMessage);
const memoryCtx = memory.slice(-3).join('\n');
const prompt = `Memory:\n${memoryCtx}\n\nSearch:\n${context}\n\nQ: ${userMessage}`;
// Pass prompt to your LLM of choice
console.log(prompt);
memory.push(`User asked: ${userMessage}`);
}
agentTurn('Best Python frameworks 2026').catch(console.error);Expected Output
Agent graph compiled with search + memory nodes
Based on current search results, the best Python frameworks in 2026 are:
1. FastAPI - async-first, ideal for APIs and microservices
2. Django - full-featured for complex web applications
3. Flask - lightweight and flexible for smaller projects
...
Search grounding cost: $0.005 per agent turn