Chatbot Architecture Patterns Beginner
Before writing a single line of chatbot code, you need to choose the right architecture. This lesson covers the three dominant patterns — intent-based, LLM-based, and hybrid — with honest trade-offs, production considerations, and a reference architecture you can adapt for any chatbot project.
Three Architecture Patterns
| Pattern | How It Works | Best For | Latency | Cost per Message |
|---|---|---|---|---|
| Intent-Based (NLU) | Classify user intent → extract slots → execute handler | Structured workflows (order status, booking) | 50-200ms | ~$0.001 |
| LLM-Based | Send conversation history to LLM → generate response | Open-ended Q&A, creative tasks | 1-10s | $0.01-0.10 |
| Hybrid | Route by confidence: high-confidence intents go to handlers, fallback to LLM | Production systems needing both speed and flexibility | Varies | $0.002-0.05 |
Intent-Based Architecture
The traditional approach used by Dialogflow, Lex, and Rasa. Fast, predictable, and cheap — but limited to predefined flows.
# Intent-based chatbot router (production pattern)
from dataclasses import dataclass
from typing import Dict, List, Optional, Callable
import re
@dataclass
class Intent:
name: str
patterns: List[str] # Training phrases
handler: Callable # Function to execute
required_slots: List[str] # Parameters to extract
confidence_threshold: float = 0.85
@dataclass
class ConversationState:
session_id: str
current_intent: Optional[str] = None
slots: Dict[str, str] = None
turn_count: int = 0
context: Dict = None
def __post_init__(self):
self.slots = self.slots or {}
self.context = self.context or {}
class IntentRouter:
"""Production intent router with slot filling."""
def __init__(self):
self.intents: Dict[str, Intent] = {}
self.sessions: Dict[str, ConversationState] = {}
def register_intent(self, intent: Intent):
self.intents[intent.name] = intent
def classify(self, text: str) -> tuple[str, float]:
"""Classify user message to intent + confidence.
In production, replace with a trained NLU model (Rasa, BERT, etc.)."""
best_intent, best_score = None, 0.0
text_lower = text.lower()
for name, intent in self.intents.items():
for pattern in intent.patterns:
if pattern.lower() in text_lower:
score = len(pattern) / len(text_lower)
if score > best_score:
best_intent, best_score = name, min(score, 1.0)
return best_intent, best_score
def extract_slots(self, text: str, intent: Intent) -> Dict[str, str]:
"""Extract required slots from user message."""
slots = {}
# Example: extract order ID pattern
order_match = re.search(r'ORD-\d{6}', text)
if order_match and 'order_id' in intent.required_slots:
slots['order_id'] = order_match.group()
# Example: extract email
email_match = re.search(r'[\w.-]+@[\w.-]+\.\w+', text)
if email_match and 'email' in intent.required_slots:
slots['email'] = email_match.group()
return slots
def process(self, session_id: str, message: str) -> str:
# Get or create session
if session_id not in self.sessions:
self.sessions[session_id] = ConversationState(session_id=session_id)
state = self.sessions[session_id]
state.turn_count += 1
# If we're in slot-filling mode, try to fill missing slots
if state.current_intent:
intent = self.intents[state.current_intent]
new_slots = self.extract_slots(message, intent)
state.slots.update(new_slots)
missing = [s for s in intent.required_slots if s not in state.slots]
if missing:
return f"I still need your {missing[0]}. Could you provide it?"
# All slots filled - execute handler
response = intent.handler(state.slots)
state.current_intent = None
state.slots = {}
return response
# Classify new intent
intent_name, confidence = self.classify(message)
if intent_name and confidence >= self.intents[intent_name].confidence_threshold:
intent = self.intents[intent_name]
slots = self.extract_slots(message, intent)
missing = [s for s in intent.required_slots if s not in slots]
if missing:
state.current_intent = intent_name
state.slots = slots
return f"Sure, I can help with that. What is your {missing[0]}?"
return intent.handler(slots)
return "I'm not sure I understand. Could you rephrase that?"
# --- Usage Example ---
def handle_order_status(slots: Dict) -> str:
return f"Order {slots['order_id']}: Shipped, arriving tomorrow."
def handle_refund(slots: Dict) -> str:
return f"Refund initiated for order {slots['order_id']}. Check {slots['email']} for confirmation."
router = IntentRouter()
router.register_intent(Intent(
name="order_status",
patterns=["where is my order", "track order", "order status"],
handler=handle_order_status,
required_slots=["order_id"]
))
router.register_intent(Intent(
name="refund",
patterns=["refund", "return my order", "money back"],
handler=handle_refund,
required_slots=["order_id", "email"]
))
LLM-Based Architecture
Send the full conversation history to an LLM. Simple to build, handles open-ended queries well, but requires careful prompt engineering and cost management.
# LLM-based chatbot with structured system prompt
import openai
from typing import List, Dict
class LLMChatbot:
"""Production LLM chatbot with conversation management."""
def __init__(self, model: str = "gpt-4o", max_history: int = 20):
self.client = openai.OpenAI()
self.model = model
self.max_history = max_history
self.sessions: Dict[str, List[Dict]] = {}
self.system_prompt = """You are a customer support agent for Acme Corp.
RULES:
1. Only answer questions about Acme products and services
2. Never make up order information - always use the lookup tool
3. If you cannot help, offer to connect to a human agent
4. Keep responses under 3 sentences unless the user asks for detail
5. Never share internal policies or pricing formulas
TONE: Professional, friendly, concise."""
def get_history(self, session_id: str) -> List[Dict]:
if session_id not in self.sessions:
self.sessions[session_id] = []
return self.sessions[session_id]
def trim_history(self, history: List[Dict]) -> List[Dict]:
"""Keep conversation within context window budget."""
if len(history) > self.max_history:
# Always keep system prompt context, trim oldest user/assistant pairs
return history[-(self.max_history):]
return history
def chat(self, session_id: str, user_message: str) -> str:
history = self.get_history(session_id)
history.append({"role": "user", "content": user_message})
history = self.trim_history(history)
messages = [{"role": "system", "content": self.system_prompt}] + history
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
temperature=0.3, # Lower = more consistent
max_tokens=500, # Cap response length
top_p=0.9
)
assistant_msg = response.choices[0].message.content
history.append({"role": "assistant", "content": assistant_msg})
self.sessions[session_id] = history
return assistant_msg
Hybrid Architecture (Recommended for Production)
Most production chatbots use a hybrid: route high-confidence intents to fast deterministic handlers, and fall back to an LLM for everything else.
# Hybrid router: intent-first with LLM fallback
class HybridChatbot:
"""Production hybrid: intent router + LLM fallback."""
def __init__(self, intent_router: IntentRouter, llm_chatbot: LLMChatbot,
confidence_threshold: float = 0.80):
self.intent_router = intent_router
self.llm = llm_chatbot
self.confidence_threshold = confidence_threshold
def process(self, session_id: str, message: str) -> dict:
# Step 1: Try intent classification
intent_name, confidence = self.intent_router.classify(message)
if intent_name and confidence >= self.confidence_threshold:
response = self.intent_router.process(session_id, message)
return {
"response": response,
"source": "intent",
"intent": intent_name,
"confidence": confidence,
"latency_budget": "fast" # typically <200ms
}
# Step 2: Fall back to LLM
response = self.llm.chat(session_id, message)
return {
"response": response,
"source": "llm",
"intent": None,
"confidence": 0.0,
"latency_budget": "standard" # typically 1-5s
}
Session and Multi-Turn Context Management
Every production chatbot needs session management. Sessions track who the user is, what they've said, and where they are in a conversation flow.
# Production session store (Redis-backed)
import json
import time
from typing import Optional
class SessionStore:
"""Redis-backed session store for production chatbots."""
def __init__(self, redis_client, ttl_seconds: int = 3600):
self.redis = redis_client
self.ttl = ttl_seconds
def get_session(self, session_id: str) -> Optional[dict]:
data = self.redis.get(f"chat:session:{session_id}")
if data:
session = json.loads(data)
session["last_accessed"] = time.time()
self.save_session(session_id, session)
return session
return None
def create_session(self, session_id: str, user_id: str,
channel: str) -> dict:
session = {
"session_id": session_id,
"user_id": user_id,
"channel": channel,
"created_at": time.time(),
"last_accessed": time.time(),
"turn_count": 0,
"messages": [],
"context": {}, # Extracted entities, preferences
"active_intent": None,
"slots": {}
}
self.save_session(session_id, session)
return session
def save_session(self, session_id: str, session: dict):
self.redis.setex(
f"chat:session:{session_id}",
self.ttl,
json.dumps(session)
)
def add_message(self, session_id: str, role: str, content: str,
metadata: dict = None):
session = self.get_session(session_id)
if session:
session["messages"].append({
"role": role,
"content": content,
"timestamp": time.time(),
"metadata": metadata or {}
})
session["turn_count"] += 1
self.save_session(session_id, session)
Lilly Tech Systems