YaCy is a decentralized, peer-to-peer search engine that crawls and indexes the web without relying on any central server. Combined with llama.cpp for local LLM inference, you get a fully offline AI search pipeline with zero API costs. The tradeoff is index quality -- YaCy indexes what its peers share, which is far smaller than Google or Bing. This tutorial sets up YaCy, connects it to llama.cpp via the yacy_expert bridge, and adds a Scavio search fallback for queries where YaCy coverage is thin. Cost: $0 for local queries, $0.005 per Scavio fallback.
Prerequisites
- Docker installed
- At least 8GB RAM for llama.cpp
- A GGUF model file (e.g., Mistral 7B Q4)
- Python 3.9+ installed
- A Scavio API key for fallback searches
Walkthrough
Step 1: Start YaCy in Docker
Run YaCy as a Docker container. The admin interface runs on port 8090 and the search API on port 8090/yacysearch.json.
# Pull and run YaCy
docker run -d --name yacy \
-p 8090:8090 \
-v yacy_data:/opt/yacy_search_server/DATA \
yacy/yacy_search_server:latest
# Wait for startup
echo 'Waiting for YaCy to initialize...'
sleep 15
# Test the search API
curl -s 'http://localhost:8090/yacysearch.json?query=python+programming&maximumRecords=3' | python3 -m json.tool | head -20
# Seed the index with some crawl targets
curl -s 'http://localhost:8090/Crawler_p.html?crawlingURL=https://docs.python.org&crawlingDepth=2&range=wide'Step 2: Set up llama.cpp server
Run llama.cpp as an OpenAI-compatible API server. This handles the LLM inference for summarizing search results.
# Download and build llama.cpp (or use pre-built binary)
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp && make -j$(nproc)
# Start the server with your GGUF model
./llama-server \
--model ~/models/mistral-7b-instruct-v0.3.Q4_K_M.gguf \
--host 0.0.0.0 \
--port 8080 \
--ctx-size 4096 \
--n-gpu-layers 35
# Test it
curl -s http://localhost:8080/v1/chat/completions \
-H 'Content-Type: application/json' \
-d '{"model": "local", "messages": [{"role": "user", "content": "Hello"}], "max_tokens": 50}'Step 3: Build the yacy_expert bridge in Python
Create a Python script that queries YaCy, formats results as context, and sends them to llama.cpp for a grounded answer.
import requests, os
YACY_URL = 'http://localhost:8090/yacysearch.json'
LLAMA_URL = 'http://localhost:8080/v1/chat/completions'
SCAVIO_KEY = os.environ.get('SCAVIO_API_KEY', '')
def yacy_search(query: str, count: int = 5) -> list:
resp = requests.get(YACY_URL, params={
'query': query, 'maximumRecords': count, 'resource': 'global'
}, timeout=10)
channels = resp.json().get('channels', [{}])
items = channels[0].get('items', []) if channels else []
return [{'title': r.get('title', ''), 'snippet': r.get('description', ''),
'url': r.get('link', '')} for r in items]
def scavio_fallback(query: str, count: int = 5) -> list:
if not SCAVIO_KEY:
return []
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': count})
return [{'title': r['title'], 'snippet': r.get('snippet', ''),
'url': r['link']} for r in resp.json().get('organic_results', [])[:count]]
def search(query: str) -> list:
results = yacy_search(query)
if len(results) < 2:
print('YaCy coverage thin, falling back to Scavio ($0.005)')
results = scavio_fallback(query) + results
return results
results = search('python asyncio tutorial')
for r in results:
print(f' {r["title"][:60]}')Step 4: Add LLM-powered answer generation
Send the search results to llama.cpp to generate a grounded, cited answer. The LLM only summarizes what the search found.
def ask(query: str) -> str:
results = search(query)
if not results:
return 'No results found in YaCy or fallback.'
context = '\n\n'.join(
f'[{i+1}] {r["title"]}\n{r["snippet"]}\nSource: {r["url"]}'
for i, r in enumerate(results)
)
resp = requests.post(LLAMA_URL, json={
'model': 'local',
'messages': [
{'role': 'system', 'content': 'Answer using ONLY the search results below. Cite sources as [1], [2], etc.'},
{'role': 'user', 'content': f'Search results:\n{context}\n\nQuestion: {query}'}
],
'max_tokens': 512,
'temperature': 0.3
}, timeout=60)
answer = resp.json()['choices'][0]['message']['content']
return answer
print(ask('How do I use asyncio gather in Python?'))Python Example
import requests, os
YACY = 'http://localhost:8090/yacysearch.json'
LLAMA = 'http://localhost:8080/v1/chat/completions'
SCAVIO_KEY = os.environ.get('SCAVIO_API_KEY', '')
def search(query, count=5):
results = requests.get(YACY, params={'query': query, 'maximumRecords': count}).json()
items = results.get('channels', [{}])[0].get('items', [])
if len(items) < 2 and SCAVIO_KEY:
resp = requests.post('https://api.scavio.dev/api/v1/search',
headers={'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json'},
json={'query': query, 'country_code': 'us', 'num_results': count})
items = [{'title': r['title'], 'description': r.get('snippet', ''), 'link': r['link']}
for r in resp.json().get('organic_results', [])]
return items
def ask(query):
results = search(query)
ctx = '\n'.join(f'[{i+1}] {r.get("title","")}: {r.get("description","")}' for i, r in enumerate(results))
resp = requests.post(LLAMA, json={'model': 'local', 'messages': [
{'role': 'system', 'content': 'Answer from search results only. Cite [1],[2].'},
{'role': 'user', 'content': f'{ctx}\n\nQ: {query}'}], 'max_tokens': 512})
return resp.json()['choices'][0]['message']['content']
print(ask('python asyncio gather example'))JavaScript Example
const YACY = 'http://localhost:8090/yacysearch.json';
const LLAMA = 'http://localhost:8080/v1/chat/completions';
const SCAVIO_KEY = process.env.SCAVIO_API_KEY;
async function search(query, count = 5) {
const yacyResp = await fetch(`${YACY}?query=${encodeURIComponent(query)}&maximumRecords=${count}`);
let items = (await yacyResp.json()).channels?.[0]?.items || [];
if (items.length < 2 && SCAVIO_KEY) {
const resp = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'x-api-key': SCAVIO_KEY, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, country_code: 'us', num_results: count })
});
const data = await resp.json();
items = (data.organic_results || []).map(r => ({ title: r.title, description: r.snippet, link: r.link }));
}
return items;
}
async function ask(query) {
const results = await search(query);
const ctx = results.map((r, i) => `[${i+1}] ${r.title}: ${r.description}`).join('\n');
const resp = await fetch(LLAMA, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: 'local', messages: [
{ role: 'system', content: 'Answer from search results only.' },
{ role: 'user', content: `${ctx}\n\nQ: ${query}` }], max_tokens: 512 })
});
console.log((await resp.json()).choices[0].message.content);
}
ask('python asyncio gather example');Expected Output
YaCy coverage thin, falling back to Scavio ($0.005)
Python asyncio.gather() documentation
Real Python: Async IO in Python
Stack Overflow: How to use asyncio.gather
Based on the search results, asyncio.gather() runs multiple coroutines
concurrently and waits for all to complete [1]. You pass awaitable objects
as arguments: results = await asyncio.gather(coro1(), coro2()) [2]...